Tighten consistency, explainability, and deterministic exports for public records Space
Browse files- __pycache__/public_space_app.cpython-311.pyc +2 -2
- dataset_bundle/evidence_audit/consistency_report.json +1 -1
- dataset_bundle/graph_links.csv +0 -0
- dataset_bundle/network_graph/edges.csv +0 -0
- dataset_bundle/network_graph/graph_config.json +5 -4
- dataset_bundle/public_release_manifest.json +1 -1
- public_copy.json +1 -1
- public_space_app.py +488 -89
__pycache__/public_space_app.cpython-311.pyc
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b7fd4155ab406455d61abeab73be9fdb3d4f85e5ebdb4a63b9cdbe78f832505
|
| 3 |
+
size 170984
|
dataset_bundle/evidence_audit/consistency_report.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"generated_at": "2026-04-
|
| 3 |
"event_provenance": {
|
| 4 |
"event_count": 3918,
|
| 5 |
"events_with_artifacts": 3878,
|
|
|
|
| 1 |
{
|
| 2 |
+
"generated_at": "2026-04-19T19:38:57-04:00",
|
| 3 |
"event_provenance": {
|
| 4 |
"event_count": 3918,
|
| 5 |
"events_with_artifacts": 3878,
|
dataset_bundle/graph_links.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dataset_bundle/network_graph/edges.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dataset_bundle/network_graph/graph_config.json
CHANGED
|
@@ -14,15 +14,16 @@
|
|
| 14 |
"unresolved": 1808,
|
| 15 |
"linked": 1117,
|
| 16 |
"needs_review": 119,
|
| 17 |
-
"acceptable_with_label":
|
| 18 |
-
"release_ok":
|
| 19 |
},
|
| 20 |
"default_filters": {
|
| 21 |
"relationship_family": "sector",
|
| 22 |
"review_status": "stronger",
|
| 23 |
-
"max_edges":
|
| 24 |
"hide_unresolved_only": true,
|
| 25 |
-
"overview_member_limit":
|
|
|
|
| 26 |
},
|
| 27 |
"example_member_searches": [
|
| 28 |
"Josh Gottheimer",
|
|
|
|
| 14 |
"unresolved": 1808,
|
| 15 |
"linked": 1117,
|
| 16 |
"needs_review": 119,
|
| 17 |
+
"acceptable_with_label": 1174,
|
| 18 |
+
"release_ok": 339
|
| 19 |
},
|
| 20 |
"default_filters": {
|
| 21 |
"relationship_family": "sector",
|
| 22 |
"review_status": "stronger",
|
| 23 |
+
"max_edges": 30,
|
| 24 |
"hide_unresolved_only": true,
|
| 25 |
+
"overview_member_limit": 5,
|
| 26 |
+
"default_member_search": "Josh Gottheimer"
|
| 27 |
},
|
| 28 |
"example_member_searches": [
|
| 29 |
"Josh Gottheimer",
|
dataset_bundle/public_release_manifest.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"public_version": "congress-public-records-slice-2026-04-v1",
|
| 3 |
"title": "Congress Public Records Slice",
|
| 4 |
-
"release_date": "2026-04-
|
| 5 |
"slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
|
| 6 |
"source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
|
| 7 |
"dataset_repo_id": "cjc0013/cmp-data",
|
|
|
|
| 1 |
{
|
| 2 |
"public_version": "congress-public-records-slice-2026-04-v1",
|
| 3 |
"title": "Congress Public Records Slice",
|
| 4 |
+
"release_date": "2026-04-19T19:40:15-04:00",
|
| 5 |
"slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
|
| 6 |
"source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
|
| 7 |
"dataset_repo_id": "cjc0013/cmp-data",
|
public_copy.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"public_version": "congress-public-records-slice-2026-04-v1
|
| 3 |
"title": "Congress Public Records Slice",
|
| 4 |
"subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
|
| 5 |
"dataset_repo_id": "cjc0013/cmp-data",
|
|
|
|
| 1 |
{
|
| 2 |
+
"public_version": "congress-public-records-slice-2026-04-v1",
|
| 3 |
"title": "Congress Public Records Slice",
|
| 4 |
"subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
|
| 5 |
"dataset_repo_id": "cjc0013/cmp-data",
|
public_space_app.py
CHANGED
|
@@ -899,31 +899,353 @@ def _stronger_support_count(row: Dict[str, Any]) -> int:
|
|
| 899 |
)
|
| 900 |
|
| 901 |
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 905 |
chips: list[str] = []
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
if
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 927 |
|
| 928 |
|
| 929 |
def _window_overlap_text(row: Dict[str, Any]) -> str:
|
|
@@ -1034,7 +1356,7 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
|
|
| 1034 |
{
|
| 1035 |
"relationship_id": str(row.get("edge_id") or ""),
|
| 1036 |
"member": str(row.get("member_name") or row.get("member_slug") or ""),
|
| 1037 |
-
"counterparty / sector":
|
| 1038 |
"overall score": display_score,
|
| 1039 |
"raw score": raw_score,
|
| 1040 |
"relative score": relative_score,
|
|
@@ -1188,19 +1510,26 @@ def _select_edge_row(edges: pd.DataFrame, relationship_id: str) -> Dict[str, Any
|
|
| 1188 |
return matched.head(1).to_dict("records")[0]
|
| 1189 |
|
| 1190 |
|
| 1191 |
-
def _relationship_detail_markdown(
|
| 1192 |
-
|
| 1193 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1194 |
return "Select a relationship to inspect why it appears in this released slice."
|
|
|
|
| 1195 |
family = str(row.get("relationship_family", "") or "")
|
| 1196 |
-
chips =
|
| 1197 |
-
reason_codes = [
|
| 1198 |
-
urls =
|
| 1199 |
-
raw_score =
|
| 1200 |
-
relative_score =
|
| 1201 |
-
display_score =
|
| 1202 |
lines = [
|
| 1203 |
-
f"### {row.get('member_name') or row.get('member_slug')} -> {
|
| 1204 |
"",
|
| 1205 |
f"- Relationship view: `{_plain_family_label(family)}`",
|
| 1206 |
f"- Strength label: `{_plain_status_label(str(row.get('relationship_status', '') or ''))}`",
|
|
@@ -1210,14 +1539,22 @@ def _relationship_detail_markdown(edges: pd.DataFrame, relationship_id: str, ran
|
|
| 1210 |
f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
|
| 1211 |
f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
|
| 1212 |
f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
|
| 1213 |
-
f"- Integrity-checked source records attached: `{int(
|
| 1214 |
f"- Unresolved source refs still counted: `{int(row.get('unresolved_source_ref_count', 0) or 0)}`",
|
| 1215 |
f"- Evidence signals: `{', '.join(chips) if chips else 'published source support'}`",
|
| 1216 |
f"- Time-window overlap: `{_window_overlap_text(row)}`",
|
| 1217 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1218 |
if reason_codes:
|
| 1219 |
lines.extend(["", "#### Why it is linked in this slice", ""])
|
| 1220 |
lines.extend(f"- {item}" for item in reason_codes)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1221 |
if urls:
|
| 1222 |
lines.extend(["", "#### Example published source URLs", ""])
|
| 1223 |
lines.extend(f"- [{item}]({item})" for item in urls)
|
|
@@ -1227,17 +1564,12 @@ def _relationship_detail_markdown(edges: pd.DataFrame, relationship_id: str, ran
|
|
| 1227 |
"#### Integrity note",
|
| 1228 |
"",
|
| 1229 |
"- `Integrity-checked` means the release includes a cryptographic fingerprint to help show a published record has not been altered.",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1230 |
]
|
| 1231 |
)
|
| 1232 |
-
if str(ranking_mode or "raw").strip().lower() == "relative":
|
| 1233 |
-
lines.extend(
|
| 1234 |
-
[
|
| 1235 |
-
"",
|
| 1236 |
-
"#### Ranking note",
|
| 1237 |
-
"",
|
| 1238 |
-
"- This view is using the experimental relative score, which compares this relationship to the same member's other visible links in the current filtered view.",
|
| 1239 |
-
]
|
| 1240 |
-
)
|
| 1241 |
return "\n".join(lines)
|
| 1242 |
|
| 1243 |
|
|
@@ -1246,15 +1578,20 @@ def _safe_export_stem(value: str) -> str:
|
|
| 1246 |
return slug or "relationship-export"
|
| 1247 |
|
| 1248 |
|
| 1249 |
-
def _relationship_export_rows(
|
| 1250 |
-
|
| 1251 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1252 |
return []
|
| 1253 |
-
|
| 1254 |
-
|
| 1255 |
-
|
| 1256 |
-
|
| 1257 |
-
urls = _split_pipe_values(row.get("source_urls", ""), limit=8)
|
| 1258 |
export_rows: list[dict[str, Any]] = [
|
| 1259 |
{
|
| 1260 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
@@ -1271,7 +1608,24 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
|
|
| 1271 |
"item_detail": "Top-level relationship summary for export.",
|
| 1272 |
}
|
| 1273 |
]
|
| 1274 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1275 |
export_rows.append(
|
| 1276 |
{
|
| 1277 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
@@ -1288,7 +1642,7 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
|
|
| 1288 |
"item_detail": _evidence_chip_help(chip),
|
| 1289 |
}
|
| 1290 |
)
|
| 1291 |
-
for reason in
|
| 1292 |
export_rows.append(
|
| 1293 |
{
|
| 1294 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
@@ -1305,7 +1659,7 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
|
|
| 1305 |
"item_detail": reason,
|
| 1306 |
}
|
| 1307 |
)
|
| 1308 |
-
for item in
|
| 1309 |
export_rows.append(
|
| 1310 |
{
|
| 1311 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
@@ -1322,7 +1676,7 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
|
|
| 1322 |
"item_detail": item,
|
| 1323 |
}
|
| 1324 |
)
|
| 1325 |
-
for url in
|
| 1326 |
export_rows.append(
|
| 1327 |
{
|
| 1328 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
@@ -1342,14 +1696,21 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
|
|
| 1342 |
return export_rows
|
| 1343 |
|
| 1344 |
|
| 1345 |
-
def _relationship_handoff_rows(
|
| 1346 |
-
|
| 1347 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1348 |
return []
|
|
|
|
| 1349 |
family = str(row.get("relationship_family", "") or "")
|
| 1350 |
-
raw_score =
|
| 1351 |
-
relative_score =
|
| 1352 |
-
display_score =
|
| 1353 |
handoff_rows: list[dict[str, Any]] = []
|
| 1354 |
|
| 1355 |
def add(section: str, label: str, explanation: str, source_url: str = "") -> None:
|
|
@@ -1363,34 +1724,52 @@ def _relationship_handoff_rows(edges: pd.DataFrame, relationship_id: str, rankin
|
|
| 1363 |
)
|
| 1364 |
|
| 1365 |
add("Summary", "Member", str(row.get("member_name") or row.get("member_slug") or ""))
|
| 1366 |
-
add("Summary", "Target",
|
| 1367 |
add("Summary", "Relationship view", _plain_family_label(family))
|
| 1368 |
add("Summary", "Strength label", _plain_status_label(str(row.get("relationship_status", "") or "")))
|
| 1369 |
add("Summary", "Displayed score", str(display_score))
|
| 1370 |
add("Summary", "Raw score", str(raw_score))
|
| 1371 |
add("Summary", "Relative-to-baseline score (experimental)", str(relative_score))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1372 |
add("Summary", "Supporting relationship rows", str(int(row.get("link_count", 0) or 0)))
|
| 1373 |
add("Summary", "Stronger-support rows", str(_stronger_support_count(row)))
|
| 1374 |
add("Summary", "Caution / weaker rows", str(int(row.get("review_count", 0) or 0) if family == "recipient" else int(row.get("weak_event_count", 0) or 0)))
|
| 1375 |
-
add("Summary", "Integrity-checked source records attached", str(int(
|
| 1376 |
add("Summary", "Unresolved source refs still counted", str(int(row.get("unresolved_source_ref_count", 0) or 0)))
|
| 1377 |
add("Summary", "Evidence window", _window_overlap_text(row))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1378 |
|
| 1379 |
-
for chip in
|
| 1380 |
add("Evidence signals", chip.title(), _evidence_chip_help(chip))
|
| 1381 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1382 |
add("Why this link appears", reason, reason)
|
| 1383 |
-
for item in
|
| 1384 |
add("What would strengthen it", "Needs stronger support", item)
|
| 1385 |
-
for url in
|
| 1386 |
add("Published source URLs", urlparse(url).netloc or "Published source URL", "Open this published record directly.", url)
|
| 1387 |
return handoff_rows
|
| 1388 |
|
| 1389 |
|
| 1390 |
def _write_relationship_export_bundle(
|
| 1391 |
-
edges: pd.DataFrame,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1392 |
) -> tuple[str, str | None, str | None, str | None]:
|
| 1393 |
-
export_rows = _relationship_export_rows(edges, relationship_id, ranking_mode)
|
| 1394 |
if not export_rows:
|
| 1395 |
return "Pick one relationship to generate exportable evidence files.", None, None, None
|
| 1396 |
relationship_id_value = str(export_rows[0]["relationship_id"] or relationship_id)
|
|
@@ -1421,7 +1800,7 @@ def _write_relationship_export_bundle(
|
|
| 1421 |
for export_row in export_rows:
|
| 1422 |
writer.writerow({name: export_row.get(name, "") for name in fieldnames})
|
| 1423 |
|
| 1424 |
-
handoff_rows = _relationship_handoff_rows(edges, relationship_id, ranking_mode)
|
| 1425 |
handoff_fieldnames = ["section", "label", "explanation", "source_url"]
|
| 1426 |
with handoff_csv_path.open("w", encoding="utf-8", newline="") as handle:
|
| 1427 |
writer = csv.DictWriter(handle, fieldnames=handoff_fieldnames)
|
|
@@ -1429,7 +1808,8 @@ def _write_relationship_export_bundle(
|
|
| 1429 |
for export_row in handoff_rows:
|
| 1430 |
writer.writerow({name: export_row.get(name, "") for name in handoff_fieldnames})
|
| 1431 |
|
| 1432 |
-
|
|
|
|
| 1433 |
pdf = canvas.Canvas(str(pdf_path), pagesize=LETTER, invariant=1)
|
| 1434 |
width, height = LETTER
|
| 1435 |
left = 54
|
|
@@ -1488,20 +1868,27 @@ def _timeline_window_from_url(url: str) -> tuple[int, str, str]:
|
|
| 1488 |
return (60, "Published source", urlparse(normalized).netloc if normalized.startswith("http") else "Published source")
|
| 1489 |
|
| 1490 |
|
| 1491 |
-
def _relationship_timeline_html(
|
| 1492 |
-
|
| 1493 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1494 |
return "<div style=\"padding: 1rem; border: 1px solid #d6d0c4; background: #fffdf8; color: #3a3a3a;\">Choose a relationship to see its evidence window.</div>"
|
|
|
|
| 1495 |
entries: list[tuple[int, str, str, str]] = []
|
| 1496 |
seen: set[tuple[str, str, str]] = set()
|
| 1497 |
-
for url in
|
| 1498 |
sort_key, window_label, track_label = _timeline_window_from_url(url)
|
| 1499 |
detail = url
|
| 1500 |
dedupe_key = (window_label, track_label, detail)
|
| 1501 |
if dedupe_key not in seen:
|
| 1502 |
seen.add(dedupe_key)
|
| 1503 |
entries.append((sort_key, window_label, track_label, detail))
|
| 1504 |
-
if
|
| 1505 |
entries.append((70, "Undated support", "Member profile support", "Profile-based support is included in this relationship summary."))
|
| 1506 |
if int(row.get("unresolved_source_ref_count", 0) or 0) > 0:
|
| 1507 |
entries.append((80, "Partly unresolved", "Some official references remain unresolved", f"{int(row.get('unresolved_source_ref_count', 0) or 0)} unresolved refs are still counted in this released row."))
|
|
@@ -1614,7 +2001,15 @@ def _filter_graph(
|
|
| 1614 |
if score_label != "all":
|
| 1615 |
filtered = filtered[filtered["score_labels"].fillna("").str.contains(score_label, case=False, na=False)]
|
| 1616 |
if review_status == "stronger":
|
| 1617 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1618 |
elif review_status != "all":
|
| 1619 |
filtered = filtered[filtered["relationship_status"] == review_status]
|
| 1620 |
if hide_unresolved_only:
|
|
@@ -1708,11 +2103,14 @@ def _render_graph(nodes: pd.DataFrame, edges: pd.DataFrame) -> str:
|
|
| 1708 |
if not node:
|
| 1709 |
continue
|
| 1710 |
node_type = str(node.get("node_type", ""))
|
| 1711 |
-
|
|
|
|
|
|
|
|
|
|
| 1712 |
role_label = {
|
| 1713 |
"member": "House member",
|
| 1714 |
"recipient": "Funding recipient",
|
| 1715 |
-
"sector": "
|
| 1716 |
}.get(node_type, node_type.title())
|
| 1717 |
title_lines.append(f"Role: {html.escape(role_label)}")
|
| 1718 |
if node_type == "member":
|
|
@@ -1723,7 +2121,7 @@ def _render_graph(nodes: pd.DataFrame, edges: pd.DataFrame) -> str:
|
|
| 1723 |
title_lines.append(f"Released relationships in graph data: {int(node.get('connected_edge_count', 0) or 0)}")
|
| 1724 |
network.add_node(
|
| 1725 |
node_id,
|
| 1726 |
-
label=
|
| 1727 |
title="<br>".join(title_lines),
|
| 1728 |
color=color_map.get(str(node.get("node_type", "")), "#6e6e6e"),
|
| 1729 |
shape="dot",
|
|
@@ -1741,7 +2139,7 @@ def _render_graph(nodes: pd.DataFrame, edges: pd.DataFrame) -> str:
|
|
| 1741 |
if ":" in part
|
| 1742 |
]
|
| 1743 |
title_lines = [
|
| 1744 |
-
f"<b>{html.escape(str(row.get('member_name', '') or row.get('member_slug', '')))} -> {html.escape(
|
| 1745 |
f"Relationship type: {html.escape(family_label)}",
|
| 1746 |
f"Presentation tier: {html.escape(_plain_status_label(status))}",
|
| 1747 |
html.escape(_plain_status_explainer(status)),
|
|
@@ -1832,6 +2230,7 @@ def build_app(copy_path: str | Path):
|
|
| 1832 |
data = load_release_data(copy_path)
|
| 1833 |
manifest = data["manifest"]
|
| 1834 |
events = data["events"]
|
|
|
|
| 1835 |
nodes = data["graph_nodes"]
|
| 1836 |
edges = data["graph_edges"]
|
| 1837 |
provenance = data["event_provenance"]
|
|
@@ -1888,7 +2287,7 @@ def build_app(copy_path: str | Path):
|
|
| 1888 |
valid_ids = {value for _, value in options}
|
| 1889 |
selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
|
| 1890 |
export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
|
| 1891 |
-
filtered_edges, selected or "", ranking_mode
|
| 1892 |
)
|
| 1893 |
return (
|
| 1894 |
_overview_summary_markdown(
|
|
@@ -1908,8 +2307,8 @@ def build_app(copy_path: str | Path):
|
|
| 1908 |
ranking_mode=ranking_mode,
|
| 1909 |
),
|
| 1910 |
gr.update(choices=options, value=selected),
|
| 1911 |
-
_relationship_detail_markdown(filtered_edges, selected or "", ranking_mode),
|
| 1912 |
-
_relationship_timeline_html(filtered_edges, selected or ""),
|
| 1913 |
export_note,
|
| 1914 |
export_csv,
|
| 1915 |
export_handoff_csv,
|
|
@@ -1926,11 +2325,11 @@ def build_app(copy_path: str | Path):
|
|
| 1926 |
):
|
| 1927 |
filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
|
| 1928 |
export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
|
| 1929 |
-
filtered_edges, relationship_id, ranking_mode
|
| 1930 |
)
|
| 1931 |
return (
|
| 1932 |
-
_relationship_detail_markdown(filtered_edges, relationship_id, ranking_mode),
|
| 1933 |
-
_relationship_timeline_html(filtered_edges, relationship_id),
|
| 1934 |
export_note,
|
| 1935 |
export_csv,
|
| 1936 |
export_handoff_csv,
|
|
|
|
| 899 |
)
|
| 900 |
|
| 901 |
|
| 902 |
+
TOPIC_AREA_PREFERRED_BILL_HINTS = {
|
| 903 |
+
"finance": ("billstatus-118hr2891.xml",),
|
| 904 |
+
}
|
| 905 |
+
|
| 906 |
+
|
| 907 |
+
def _relationship_target_key(value: Any) -> str:
|
| 908 |
+
normalized = re.sub(r"[^a-z0-9]+", "_", str(value or "").strip().lower()).strip("_")
|
| 909 |
+
return normalized
|
| 910 |
+
|
| 911 |
+
|
| 912 |
+
def _display_target_label(row: Dict[str, Any]) -> str:
|
| 913 |
+
label = str(row.get("target_label") or "").strip()
|
| 914 |
+
if str(row.get("relationship_family") or "") != "sector" or not label:
|
| 915 |
+
return label
|
| 916 |
+
words = re.sub(r"[_-]+", " ", label).strip()
|
| 917 |
+
return f"{words.title()} topic area"
|
| 918 |
+
|
| 919 |
+
|
| 920 |
+
def _plain_link_type(value: str) -> str:
|
| 921 |
+
normalized = str(value or "").strip().lower()
|
| 922 |
+
mapping = {
|
| 923 |
+
"trade_disclosure_to_sector": "Trade disclosure to topic-area mapping",
|
| 924 |
+
"annual_financial_disclosure_to_sector": "Annual financial disclosure to topic-area mapping",
|
| 925 |
+
"member_to_sector_profile": "Member profile or committee-context mapping",
|
| 926 |
+
"member_to_earmark_request": "Funding-recipient linkage",
|
| 927 |
+
"member_to_earmark_request_unresolved": "Funding-recipient linkage still needing more review",
|
| 928 |
+
}
|
| 929 |
+
return mapping.get(normalized, normalized.replace("_", " ").title() or "Released relationship row")
|
| 930 |
+
|
| 931 |
+
|
| 932 |
+
def _source_family_for_url(url: str) -> str:
|
| 933 |
+
normalized = str(url or "").strip().lower()
|
| 934 |
+
if "/financial-pdfs/" in normalized:
|
| 935 |
+
return "annual disclosure"
|
| 936 |
+
if "/ptr-pdfs/" in normalized:
|
| 937 |
+
return "trade disclosure"
|
| 938 |
+
if "committee_info" in normalized:
|
| 939 |
+
return "committee roster"
|
| 940 |
+
if "lda.senate.gov" in normalized:
|
| 941 |
+
return "lobbying activity"
|
| 942 |
+
if "govinfo.gov/bulkdata/billstatus" in normalized:
|
| 943 |
+
return "bill record"
|
| 944 |
+
if "/evs/" in normalized or "rollcall" in normalized:
|
| 945 |
+
return "vote activity"
|
| 946 |
+
if "usaspending.gov/award/" in normalized:
|
| 947 |
+
return "funding award"
|
| 948 |
+
if "memberdata.xml" in normalized or ".house.gov/" in normalized:
|
| 949 |
+
return "member profile"
|
| 950 |
+
return "published source support"
|
| 951 |
+
|
| 952 |
+
|
| 953 |
+
def _edge_evidence_chips(row: Dict[str, Any], url_values: list[str] | None = None) -> list[str]:
|
| 954 |
+
urls = url_values if url_values is not None else _split_pipe_values(row.get("source_urls", ""), limit=12)
|
| 955 |
chips: list[str] = []
|
| 956 |
+
for url in urls:
|
| 957 |
+
chip = _source_family_for_url(url)
|
| 958 |
+
if chip not in chips:
|
| 959 |
+
chips.append(chip)
|
| 960 |
+
return chips[:6]
|
| 961 |
+
|
| 962 |
+
|
| 963 |
+
def _relationship_constituents(
|
| 964 |
+
links: pd.DataFrame,
|
| 965 |
+
events: pd.DataFrame,
|
| 966 |
+
row: Dict[str, Any],
|
| 967 |
+
) -> tuple[pd.DataFrame, pd.DataFrame]:
|
| 968 |
+
empty_links = links.head(0).copy()
|
| 969 |
+
empty_events = events.head(0).copy()
|
| 970 |
+
if not row:
|
| 971 |
+
return empty_links, empty_events
|
| 972 |
+
member_slug = str(row.get("member_slug") or "").strip()
|
| 973 |
+
family = str(row.get("relationship_family") or "").strip()
|
| 974 |
+
target_key = str(row.get("target_key") or _relationship_target_key(row.get("target_label")))
|
| 975 |
+
if not member_slug or not family or not target_key:
|
| 976 |
+
return empty_links, empty_events
|
| 977 |
+
|
| 978 |
+
link_rows = empty_links
|
| 979 |
+
if not links.empty:
|
| 980 |
+
link_mask = links["member_slug"].fillna("").astype(str).eq(member_slug)
|
| 981 |
+
if "link_family" in links.columns:
|
| 982 |
+
link_mask &= links["link_family"].fillna("").astype(str).eq(family)
|
| 983 |
+
link_target_series = links.get("relationship_target", pd.Series("", index=links.index)).fillna("").astype(str)
|
| 984 |
+
if family == "recipient":
|
| 985 |
+
fallback_series = links.get("recipient_name", pd.Series("", index=links.index)).fillna("").astype(str)
|
| 986 |
+
else:
|
| 987 |
+
fallback_series = links.get("sector", pd.Series("", index=links.index)).fillna("").astype(str)
|
| 988 |
+
link_target_series = link_target_series.where(link_target_series.str.strip() != "", fallback_series)
|
| 989 |
+
link_mask &= link_target_series.map(_relationship_target_key).eq(target_key)
|
| 990 |
+
link_rows = links[link_mask].copy()
|
| 991 |
+
|
| 992 |
+
event_rows = empty_events
|
| 993 |
+
if not events.empty:
|
| 994 |
+
event_mask = events["member_slug"].fillna("").astype(str).eq(member_slug)
|
| 995 |
+
if family == "sector":
|
| 996 |
+
event_mask &= events["event_type"].fillna("").astype(str).eq("sector_overlap_event")
|
| 997 |
+
event_target_series = events.get("sector", pd.Series("", index=events.index)).fillna("").astype(str)
|
| 998 |
+
else:
|
| 999 |
+
event_mask &= events["event_type"].fillna("").astype(str).eq("recipient_overlap_event")
|
| 1000 |
+
event_target_series = events.get("recipient_name", pd.Series("", index=events.index)).fillna("").astype(str)
|
| 1001 |
+
if "relationship_target" in events.columns:
|
| 1002 |
+
relationship_target_series = events.get("relationship_target", pd.Series("", index=events.index)).fillna("").astype(str)
|
| 1003 |
+
event_target_series = relationship_target_series.where(relationship_target_series.str.strip() != "", event_target_series)
|
| 1004 |
+
event_mask &= event_target_series.map(_relationship_target_key).eq(target_key)
|
| 1005 |
+
event_rows = events[event_mask].copy()
|
| 1006 |
+
return link_rows, event_rows
|
| 1007 |
+
|
| 1008 |
+
|
| 1009 |
+
def _collect_pipe_values(frame: pd.DataFrame, column: str, *, limit: int = 20) -> list[str]:
|
| 1010 |
+
if frame.empty or column not in frame.columns:
|
| 1011 |
+
return []
|
| 1012 |
+
items: list[str] = []
|
| 1013 |
+
for value in frame[column].fillna("").tolist():
|
| 1014 |
+
for item in _split_pipe_values(value, limit=limit):
|
| 1015 |
+
if item not in items:
|
| 1016 |
+
items.append(item)
|
| 1017 |
+
return items
|
| 1018 |
+
|
| 1019 |
+
|
| 1020 |
+
def _relationship_reason_labels(link_rows: pd.DataFrame, event_rows: pd.DataFrame, row: Dict[str, Any]) -> list[str]:
|
| 1021 |
+
labels = [
|
| 1022 |
+
_plain_reason_code(item)
|
| 1023 |
+
for item in _collect_pipe_values(link_rows, "reason_codes", limit=20)
|
| 1024 |
+
+ _collect_pipe_values(event_rows, "reason_codes", limit=20)
|
| 1025 |
+
]
|
| 1026 |
+
if not labels:
|
| 1027 |
+
labels = [_plain_reason_code(item) for item in _split_pipe_values(row.get("reason_codes", ""), limit=20)]
|
| 1028 |
+
ordered: list[str] = []
|
| 1029 |
+
for label in labels:
|
| 1030 |
+
if label and label not in ordered:
|
| 1031 |
+
ordered.append(label)
|
| 1032 |
+
return ordered[:10]
|
| 1033 |
+
|
| 1034 |
+
|
| 1035 |
+
def _relationship_strengtheners(link_rows: pd.DataFrame, event_rows: pd.DataFrame) -> list[str]:
|
| 1036 |
+
labels = [
|
| 1037 |
+
_plain_strengthener(item)
|
| 1038 |
+
for item in _collect_pipe_values(link_rows, "missing_to_strengthen", limit=20)
|
| 1039 |
+
+ _collect_pipe_values(event_rows, "missing_to_strengthen", limit=20)
|
| 1040 |
+
]
|
| 1041 |
+
ordered: list[str] = []
|
| 1042 |
+
for label in labels:
|
| 1043 |
+
if label and label not in ordered:
|
| 1044 |
+
ordered.append(label)
|
| 1045 |
+
return ordered[:10]
|
| 1046 |
+
|
| 1047 |
+
|
| 1048 |
+
def _relationship_sha_values(link_rows: pd.DataFrame, event_rows: pd.DataFrame) -> list[str]:
|
| 1049 |
+
values = _collect_pipe_values(link_rows, "sha256_values", limit=40) + _collect_pipe_values(event_rows, "sha256_values", limit=40)
|
| 1050 |
+
ordered: list[str] = []
|
| 1051 |
+
for value in values:
|
| 1052 |
+
if value and value not in ordered:
|
| 1053 |
+
ordered.append(value)
|
| 1054 |
+
return ordered
|
| 1055 |
+
|
| 1056 |
+
|
| 1057 |
+
def _relationship_link_type_mix(link_rows: pd.DataFrame) -> list[str]:
|
| 1058 |
+
if link_rows.empty or "link_type" not in link_rows.columns:
|
| 1059 |
+
return []
|
| 1060 |
+
ordered: list[str] = []
|
| 1061 |
+
for value in link_rows["link_type"].fillna("").astype(str).tolist():
|
| 1062 |
+
label = _plain_link_type(value)
|
| 1063 |
+
if label and label not in ordered:
|
| 1064 |
+
ordered.append(label)
|
| 1065 |
+
return ordered
|
| 1066 |
+
|
| 1067 |
+
|
| 1068 |
+
def _source_record_priority(record: Dict[str, Any], target_label: str) -> tuple[Any, ...]:
|
| 1069 |
+
family_rank = {
|
| 1070 |
+
"annual disclosure": 0,
|
| 1071 |
+
"trade disclosure": 1,
|
| 1072 |
+
"committee roster": 2,
|
| 1073 |
+
"lobbying activity": 3,
|
| 1074 |
+
"bill record": 4,
|
| 1075 |
+
"vote activity": 5,
|
| 1076 |
+
"funding award": 6,
|
| 1077 |
+
"member profile": 7,
|
| 1078 |
+
"published source support": 8,
|
| 1079 |
+
}.get(str(record.get("family") or ""), 9)
|
| 1080 |
+
score_label = str(record.get("score_label") or "")
|
| 1081 |
+
score_rank = {
|
| 1082 |
+
"strong_sector_overlap": 0,
|
| 1083 |
+
"earmark_recipient_linked": 0,
|
| 1084 |
+
"weak_sector_overlap": 1,
|
| 1085 |
+
}.get(score_label, 2)
|
| 1086 |
+
reason_codes = set(record.get("reason_codes") or [])
|
| 1087 |
+
reason_rank = 3
|
| 1088 |
+
for candidate, rank in (
|
| 1089 |
+
("issuer_match", 0),
|
| 1090 |
+
("legislative_relevance_match", 0),
|
| 1091 |
+
("bill_sponsor_overlap", 1),
|
| 1092 |
+
("major_vote_overlap", 2),
|
| 1093 |
+
):
|
| 1094 |
+
if candidate in reason_codes:
|
| 1095 |
+
reason_rank = rank
|
| 1096 |
+
break
|
| 1097 |
+
normalized_target = _relationship_target_key(target_label)
|
| 1098 |
+
preferred_bill_rank = 9
|
| 1099 |
+
if str(record.get("family") or "") in {"bill record", "vote activity"}:
|
| 1100 |
+
hints = TOPIC_AREA_PREFERRED_BILL_HINTS.get(normalized_target, ())
|
| 1101 |
+
for index, hint in enumerate(hints):
|
| 1102 |
+
if hint in str(record.get("url") or "").lower():
|
| 1103 |
+
preferred_bill_rank = index
|
| 1104 |
+
break
|
| 1105 |
+
origin_rank = 0 if str(record.get("origin") or "") == "event" else 1
|
| 1106 |
+
return (
|
| 1107 |
+
family_rank,
|
| 1108 |
+
preferred_bill_rank,
|
| 1109 |
+
score_rank,
|
| 1110 |
+
reason_rank,
|
| 1111 |
+
origin_rank,
|
| 1112 |
+
str(record.get("url") or ""),
|
| 1113 |
+
)
|
| 1114 |
+
|
| 1115 |
+
|
| 1116 |
+
def _relationship_source_records(link_rows: pd.DataFrame, event_rows: pd.DataFrame, target_label: str) -> list[Dict[str, Any]]:
|
| 1117 |
+
raw_records: list[Dict[str, Any]] = []
|
| 1118 |
+
for origin, frame in (("link", link_rows), ("event", event_rows)):
|
| 1119 |
+
if frame.empty:
|
| 1120 |
+
continue
|
| 1121 |
+
for record in frame.to_dict("records"):
|
| 1122 |
+
reason_codes = set(_split_pipe_values(record.get("reason_codes", ""), limit=20))
|
| 1123 |
+
score_label = str(record.get("score_label") or "")
|
| 1124 |
+
for url in _split_pipe_values(record.get("source_urls", ""), limit=24):
|
| 1125 |
+
raw_records.append(
|
| 1126 |
+
{
|
| 1127 |
+
"url": url,
|
| 1128 |
+
"family": _source_family_for_url(url),
|
| 1129 |
+
"origin": origin,
|
| 1130 |
+
"score_label": score_label,
|
| 1131 |
+
"reason_codes": reason_codes,
|
| 1132 |
+
}
|
| 1133 |
+
)
|
| 1134 |
+
best_by_url: Dict[str, Dict[str, Any]] = {}
|
| 1135 |
+
for record in sorted(raw_records, key=lambda item: _source_record_priority(item, target_label)):
|
| 1136 |
+
best_by_url.setdefault(str(record.get("url") or ""), record)
|
| 1137 |
+
return list(best_by_url.values())
|
| 1138 |
+
|
| 1139 |
+
|
| 1140 |
+
def _select_example_urls(
|
| 1141 |
+
row: Dict[str, Any],
|
| 1142 |
+
link_rows: pd.DataFrame,
|
| 1143 |
+
event_rows: pd.DataFrame,
|
| 1144 |
+
*,
|
| 1145 |
+
limit: int = 6,
|
| 1146 |
+
) -> list[str]:
|
| 1147 |
+
records = _relationship_source_records(link_rows, event_rows, str(row.get("target_label") or ""))
|
| 1148 |
+
selected: list[str] = []
|
| 1149 |
+
selected_set: set[str] = set()
|
| 1150 |
+
|
| 1151 |
+
def choose_one(family_name: str) -> None:
|
| 1152 |
+
candidates = [
|
| 1153 |
+
record
|
| 1154 |
+
for record in records
|
| 1155 |
+
if str(record.get("family") or "") == family_name and str(record.get("url") or "") not in selected_set
|
| 1156 |
+
]
|
| 1157 |
+
if not candidates:
|
| 1158 |
+
return
|
| 1159 |
+
chosen = sorted(candidates, key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")))[0]
|
| 1160 |
+
url = str(chosen.get("url") or "")
|
| 1161 |
+
selected.append(url)
|
| 1162 |
+
selected_set.add(url)
|
| 1163 |
+
|
| 1164 |
+
choose_one("annual disclosure")
|
| 1165 |
+
choose_one("trade disclosure")
|
| 1166 |
+
|
| 1167 |
+
combined_reason_codes = set(_collect_pipe_values(link_rows, "reason_codes", limit=20) + _collect_pipe_values(event_rows, "reason_codes", limit=20))
|
| 1168 |
+
if "committee_jurisdiction_match" in combined_reason_codes:
|
| 1169 |
+
choose_one("committee roster")
|
| 1170 |
+
if any(str(record.get("family") or "") == "lobbying activity" for record in records):
|
| 1171 |
+
choose_one("lobbying activity")
|
| 1172 |
+
|
| 1173 |
+
fallback_records = [
|
| 1174 |
+
record
|
| 1175 |
+
for record in records
|
| 1176 |
+
if str(record.get("url") or "") not in selected_set
|
| 1177 |
+
]
|
| 1178 |
+
family_order = {
|
| 1179 |
+
"bill record": 0,
|
| 1180 |
+
"vote activity": 1,
|
| 1181 |
+
"funding award": 2,
|
| 1182 |
+
"committee roster": 3,
|
| 1183 |
+
"member profile": 4,
|
| 1184 |
+
"published source support": 5,
|
| 1185 |
+
"annual disclosure": 6,
|
| 1186 |
+
"trade disclosure": 7,
|
| 1187 |
+
"lobbying activity": 8,
|
| 1188 |
+
}
|
| 1189 |
+
for record in sorted(
|
| 1190 |
+
fallback_records,
|
| 1191 |
+
key=lambda item: (
|
| 1192 |
+
family_order.get(str(item.get("family") or ""), 9),
|
| 1193 |
+
_source_record_priority(item, str(row.get("target_label") or "")),
|
| 1194 |
+
),
|
| 1195 |
+
):
|
| 1196 |
+
url = str(record.get("url") or "")
|
| 1197 |
+
if not url or url in selected_set:
|
| 1198 |
+
continue
|
| 1199 |
+
selected.append(url)
|
| 1200 |
+
selected_set.add(url)
|
| 1201 |
+
if len(selected) >= int(limit):
|
| 1202 |
+
break
|
| 1203 |
+
return selected[: int(limit)]
|
| 1204 |
+
|
| 1205 |
+
|
| 1206 |
+
def _relationship_context(
|
| 1207 |
+
edges: pd.DataFrame,
|
| 1208 |
+
links: pd.DataFrame,
|
| 1209 |
+
events: pd.DataFrame,
|
| 1210 |
+
relationship_id: str,
|
| 1211 |
+
ranking_mode: str,
|
| 1212 |
+
) -> Dict[str, Any] | None:
|
| 1213 |
+
row = _select_edge_row(edges, relationship_id)
|
| 1214 |
+
if not row:
|
| 1215 |
+
return None
|
| 1216 |
+
link_rows, event_rows = _relationship_constituents(links, events, row)
|
| 1217 |
+
raw_score = _relationship_score(row)
|
| 1218 |
+
relative_score = _relative_relationship_score(row, _member_activity_baselines(edges))
|
| 1219 |
+
normalized_mode = str(ranking_mode or "raw").strip().lower()
|
| 1220 |
+
display_score = relative_score if normalized_mode == "relative" else raw_score
|
| 1221 |
+
surfaced_urls = _select_example_urls(row, link_rows, event_rows, limit=6)
|
| 1222 |
+
all_urls = [record.get("url", "") for record in sorted(_relationship_source_records(link_rows, event_rows, str(row.get("target_label") or "")), key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")))]
|
| 1223 |
+
all_urls = [url for url in all_urls if url]
|
| 1224 |
+
reason_labels = _relationship_reason_labels(link_rows, event_rows, row)
|
| 1225 |
+
strengtheners = _relationship_strengtheners(link_rows, event_rows)
|
| 1226 |
+
sha_values = _relationship_sha_values(link_rows, event_rows)
|
| 1227 |
+
evidence_chips = _edge_evidence_chips(row, surfaced_urls)
|
| 1228 |
+
link_type_mix = _relationship_link_type_mix(link_rows)
|
| 1229 |
+
return {
|
| 1230 |
+
"row": row,
|
| 1231 |
+
"raw_score": raw_score,
|
| 1232 |
+
"relative_score": relative_score,
|
| 1233 |
+
"display_score": display_score,
|
| 1234 |
+
"surfaced_urls": surfaced_urls,
|
| 1235 |
+
"all_urls": all_urls,
|
| 1236 |
+
"reason_labels": reason_labels,
|
| 1237 |
+
"strengtheners": strengtheners,
|
| 1238 |
+
"sha_values": sha_values,
|
| 1239 |
+
"integrity_count": len(sha_values),
|
| 1240 |
+
"evidence_chips": evidence_chips,
|
| 1241 |
+
"link_type_mix": link_type_mix,
|
| 1242 |
+
"display_target_label": _display_target_label(row),
|
| 1243 |
+
"topic_area_note": (
|
| 1244 |
+
"Topic-area links combine several public-record signals. They do not claim that every supporting bill is narrowly about that sector."
|
| 1245 |
+
if str(row.get("relationship_family") or "") == "sector"
|
| 1246 |
+
else ""
|
| 1247 |
+
),
|
| 1248 |
+
}
|
| 1249 |
|
| 1250 |
|
| 1251 |
def _window_overlap_text(row: Dict[str, Any]) -> str:
|
|
|
|
| 1356 |
{
|
| 1357 |
"relationship_id": str(row.get("edge_id") or ""),
|
| 1358 |
"member": str(row.get("member_name") or row.get("member_slug") or ""),
|
| 1359 |
+
"counterparty / sector": _display_target_label(row),
|
| 1360 |
"overall score": display_score,
|
| 1361 |
"raw score": raw_score,
|
| 1362 |
"relative score": relative_score,
|
|
|
|
| 1510 |
return matched.head(1).to_dict("records")[0]
|
| 1511 |
|
| 1512 |
|
| 1513 |
+
def _relationship_detail_markdown(
|
| 1514 |
+
edges: pd.DataFrame,
|
| 1515 |
+
links: pd.DataFrame,
|
| 1516 |
+
events: pd.DataFrame,
|
| 1517 |
+
relationship_id: str,
|
| 1518 |
+
ranking_mode: str = "raw",
|
| 1519 |
+
) -> str:
|
| 1520 |
+
context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
|
| 1521 |
+
if not context:
|
| 1522 |
return "Select a relationship to inspect why it appears in this released slice."
|
| 1523 |
+
row = context["row"]
|
| 1524 |
family = str(row.get("relationship_family", "") or "")
|
| 1525 |
+
chips = context["evidence_chips"]
|
| 1526 |
+
reason_codes = context["reason_labels"]
|
| 1527 |
+
urls = context["surfaced_urls"]
|
| 1528 |
+
raw_score = int(context["raw_score"])
|
| 1529 |
+
relative_score = int(context["relative_score"])
|
| 1530 |
+
display_score = int(context["display_score"])
|
| 1531 |
lines = [
|
| 1532 |
+
f"### {row.get('member_name') or row.get('member_slug')} -> {context['display_target_label']}",
|
| 1533 |
"",
|
| 1534 |
f"- Relationship view: `{_plain_family_label(family)}`",
|
| 1535 |
f"- Strength label: `{_plain_status_label(str(row.get('relationship_status', '') or ''))}`",
|
|
|
|
| 1539 |
f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
|
| 1540 |
f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
|
| 1541 |
f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
|
| 1542 |
+
f"- Integrity-checked source records attached: `{int(context['integrity_count'])}`",
|
| 1543 |
f"- Unresolved source refs still counted: `{int(row.get('unresolved_source_ref_count', 0) or 0)}`",
|
| 1544 |
f"- Evidence signals: `{', '.join(chips) if chips else 'published source support'}`",
|
| 1545 |
f"- Time-window overlap: `{_window_overlap_text(row)}`",
|
| 1546 |
]
|
| 1547 |
+
if context["link_type_mix"]:
|
| 1548 |
+
lines.append(f"- Released row kinds involved: `{'; '.join(context['link_type_mix'])}`")
|
| 1549 |
+
if context["topic_area_note"]:
|
| 1550 |
+
lines.append(f"- Topic-area note: {context['topic_area_note']}")
|
| 1551 |
if reason_codes:
|
| 1552 |
lines.extend(["", "#### Why it is linked in this slice", ""])
|
| 1553 |
lines.extend(f"- {item}" for item in reason_codes)
|
| 1554 |
+
lines.append("- Note: one released row can contribute multiple signals, so the signal list can be longer than the supporting-row count.")
|
| 1555 |
+
if context["strengtheners"]:
|
| 1556 |
+
lines.extend(["", "#### What would strengthen it", ""])
|
| 1557 |
+
lines.extend(f"- {item}" for item in context["strengtheners"])
|
| 1558 |
if urls:
|
| 1559 |
lines.extend(["", "#### Example published source URLs", ""])
|
| 1560 |
lines.extend(f"- [{item}]({item})" for item in urls)
|
|
|
|
| 1564 |
"#### Integrity note",
|
| 1565 |
"",
|
| 1566 |
"- `Integrity-checked` means the release includes a cryptographic fingerprint to help show a published record has not been altered.",
|
| 1567 |
+
"",
|
| 1568 |
+
"#### Ranking note",
|
| 1569 |
+
"",
|
| 1570 |
+
"- Raw score is the default public ranking. The relative score is experimental and changes with the current filtered comparison set.",
|
| 1571 |
]
|
| 1572 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1573 |
return "\n".join(lines)
|
| 1574 |
|
| 1575 |
|
|
|
|
| 1578 |
return slug or "relationship-export"
|
| 1579 |
|
| 1580 |
|
| 1581 |
+
def _relationship_export_rows(
|
| 1582 |
+
edges: pd.DataFrame,
|
| 1583 |
+
links: pd.DataFrame,
|
| 1584 |
+
events: pd.DataFrame,
|
| 1585 |
+
relationship_id: str,
|
| 1586 |
+
ranking_mode: str,
|
| 1587 |
+
) -> list[dict[str, Any]]:
|
| 1588 |
+
context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
|
| 1589 |
+
if not context:
|
| 1590 |
return []
|
| 1591 |
+
row = context["row"]
|
| 1592 |
+
raw_score = int(context["raw_score"])
|
| 1593 |
+
relative_score = int(context["relative_score"])
|
| 1594 |
+
display_score = int(context["display_score"])
|
|
|
|
| 1595 |
export_rows: list[dict[str, Any]] = [
|
| 1596 |
{
|
| 1597 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
|
|
| 1608 |
"item_detail": "Top-level relationship summary for export.",
|
| 1609 |
}
|
| 1610 |
]
|
| 1611 |
+
for row_kind in context["link_type_mix"]:
|
| 1612 |
+
export_rows.append(
|
| 1613 |
+
{
|
| 1614 |
+
"relationship_id": str(row.get("edge_id") or ""),
|
| 1615 |
+
"member_name": str(row.get("member_name") or row.get("member_slug") or ""),
|
| 1616 |
+
"target_label": str(row.get("target_label") or ""),
|
| 1617 |
+
"relationship_family": _plain_family_label(str(row.get("relationship_family", "") or "")),
|
| 1618 |
+
"strength_label": _plain_status_label(str(row.get("relationship_status", "") or "")),
|
| 1619 |
+
"ranking_mode": str(ranking_mode or "raw"),
|
| 1620 |
+
"displayed_score": display_score,
|
| 1621 |
+
"raw_score": raw_score,
|
| 1622 |
+
"relative_score": relative_score,
|
| 1623 |
+
"item_type": "released_row_kind",
|
| 1624 |
+
"item_label": "Released row kind",
|
| 1625 |
+
"item_detail": row_kind,
|
| 1626 |
+
}
|
| 1627 |
+
)
|
| 1628 |
+
for chip in context["evidence_chips"]:
|
| 1629 |
export_rows.append(
|
| 1630 |
{
|
| 1631 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
|
|
| 1642 |
"item_detail": _evidence_chip_help(chip),
|
| 1643 |
}
|
| 1644 |
)
|
| 1645 |
+
for reason in context["reason_labels"]:
|
| 1646 |
export_rows.append(
|
| 1647 |
{
|
| 1648 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
|
|
| 1659 |
"item_detail": reason,
|
| 1660 |
}
|
| 1661 |
)
|
| 1662 |
+
for item in context["strengtheners"]:
|
| 1663 |
export_rows.append(
|
| 1664 |
{
|
| 1665 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
|
|
| 1676 |
"item_detail": item,
|
| 1677 |
}
|
| 1678 |
)
|
| 1679 |
+
for url in context["all_urls"]:
|
| 1680 |
export_rows.append(
|
| 1681 |
{
|
| 1682 |
"relationship_id": str(row.get("edge_id") or ""),
|
|
|
|
| 1696 |
return export_rows
|
| 1697 |
|
| 1698 |
|
| 1699 |
+
def _relationship_handoff_rows(
|
| 1700 |
+
edges: pd.DataFrame,
|
| 1701 |
+
links: pd.DataFrame,
|
| 1702 |
+
events: pd.DataFrame,
|
| 1703 |
+
relationship_id: str,
|
| 1704 |
+
ranking_mode: str,
|
| 1705 |
+
) -> list[dict[str, Any]]:
|
| 1706 |
+
context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
|
| 1707 |
+
if not context:
|
| 1708 |
return []
|
| 1709 |
+
row = context["row"]
|
| 1710 |
family = str(row.get("relationship_family", "") or "")
|
| 1711 |
+
raw_score = int(context["raw_score"])
|
| 1712 |
+
relative_score = int(context["relative_score"])
|
| 1713 |
+
display_score = int(context["display_score"])
|
| 1714 |
handoff_rows: list[dict[str, Any]] = []
|
| 1715 |
|
| 1716 |
def add(section: str, label: str, explanation: str, source_url: str = "") -> None:
|
|
|
|
| 1724 |
)
|
| 1725 |
|
| 1726 |
add("Summary", "Member", str(row.get("member_name") or row.get("member_slug") or ""))
|
| 1727 |
+
add("Summary", "Target", context["display_target_label"])
|
| 1728 |
add("Summary", "Relationship view", _plain_family_label(family))
|
| 1729 |
add("Summary", "Strength label", _plain_status_label(str(row.get("relationship_status", "") or "")))
|
| 1730 |
add("Summary", "Displayed score", str(display_score))
|
| 1731 |
add("Summary", "Raw score", str(raw_score))
|
| 1732 |
add("Summary", "Relative-to-baseline score (experimental)", str(relative_score))
|
| 1733 |
+
add(
|
| 1734 |
+
"Summary",
|
| 1735 |
+
"Relative score note",
|
| 1736 |
+
"Raw score is the default public ranking. Relative score is experimental and changes with the current filtered comparison set.",
|
| 1737 |
+
)
|
| 1738 |
add("Summary", "Supporting relationship rows", str(int(row.get("link_count", 0) or 0)))
|
| 1739 |
add("Summary", "Stronger-support rows", str(_stronger_support_count(row)))
|
| 1740 |
add("Summary", "Caution / weaker rows", str(int(row.get("review_count", 0) or 0) if family == "recipient" else int(row.get("weak_event_count", 0) or 0)))
|
| 1741 |
+
add("Summary", "Integrity-checked source records attached", str(int(context["integrity_count"])))
|
| 1742 |
add("Summary", "Unresolved source refs still counted", str(int(row.get("unresolved_source_ref_count", 0) or 0)))
|
| 1743 |
add("Summary", "Evidence window", _window_overlap_text(row))
|
| 1744 |
+
if context["topic_area_note"]:
|
| 1745 |
+
add("Summary", "Topic-area note", context["topic_area_note"])
|
| 1746 |
+
for row_kind in context["link_type_mix"]:
|
| 1747 |
+
add("Summary", "Released row kind", row_kind)
|
| 1748 |
|
| 1749 |
+
for chip in context["evidence_chips"]:
|
| 1750 |
add("Evidence signals", chip.title(), _evidence_chip_help(chip))
|
| 1751 |
+
add(
|
| 1752 |
+
"Why this link appears",
|
| 1753 |
+
"Signal-count note",
|
| 1754 |
+
"One released row can contribute multiple signals, so the signal list can be longer than the supporting-row count.",
|
| 1755 |
+
)
|
| 1756 |
+
for reason in context["reason_labels"]:
|
| 1757 |
add("Why this link appears", reason, reason)
|
| 1758 |
+
for item in context["strengtheners"]:
|
| 1759 |
add("What would strengthen it", "Needs stronger support", item)
|
| 1760 |
+
for url in context["surfaced_urls"]:
|
| 1761 |
add("Published source URLs", urlparse(url).netloc or "Published source URL", "Open this published record directly.", url)
|
| 1762 |
return handoff_rows
|
| 1763 |
|
| 1764 |
|
| 1765 |
def _write_relationship_export_bundle(
|
| 1766 |
+
edges: pd.DataFrame,
|
| 1767 |
+
links: pd.DataFrame,
|
| 1768 |
+
events: pd.DataFrame,
|
| 1769 |
+
relationship_id: str,
|
| 1770 |
+
ranking_mode: str,
|
| 1771 |
) -> tuple[str, str | None, str | None, str | None]:
|
| 1772 |
+
export_rows = _relationship_export_rows(edges, links, events, relationship_id, ranking_mode)
|
| 1773 |
if not export_rows:
|
| 1774 |
return "Pick one relationship to generate exportable evidence files.", None, None, None
|
| 1775 |
relationship_id_value = str(export_rows[0]["relationship_id"] or relationship_id)
|
|
|
|
| 1800 |
for export_row in export_rows:
|
| 1801 |
writer.writerow({name: export_row.get(name, "") for name in fieldnames})
|
| 1802 |
|
| 1803 |
+
handoff_rows = _relationship_handoff_rows(edges, links, events, relationship_id, ranking_mode)
|
| 1804 |
handoff_fieldnames = ["section", "label", "explanation", "source_url"]
|
| 1805 |
with handoff_csv_path.open("w", encoding="utf-8", newline="") as handle:
|
| 1806 |
writer = csv.DictWriter(handle, fieldnames=handoff_fieldnames)
|
|
|
|
| 1808 |
for export_row in handoff_rows:
|
| 1809 |
writer.writerow({name: export_row.get(name, "") for name in handoff_fieldnames})
|
| 1810 |
|
| 1811 |
+
context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
|
| 1812 |
+
title = f"{export_rows[0]['member_name']} -> {context['display_target_label'] if context else export_rows[0]['target_label']}"
|
| 1813 |
pdf = canvas.Canvas(str(pdf_path), pagesize=LETTER, invariant=1)
|
| 1814 |
width, height = LETTER
|
| 1815 |
left = 54
|
|
|
|
| 1868 |
return (60, "Published source", urlparse(normalized).netloc if normalized.startswith("http") else "Published source")
|
| 1869 |
|
| 1870 |
|
| 1871 |
+
def _relationship_timeline_html(
|
| 1872 |
+
edges: pd.DataFrame,
|
| 1873 |
+
links: pd.DataFrame,
|
| 1874 |
+
events: pd.DataFrame,
|
| 1875 |
+
relationship_id: str,
|
| 1876 |
+
ranking_mode: str = "raw",
|
| 1877 |
+
) -> str:
|
| 1878 |
+
context = _relationship_context(edges, links, events, relationship_id, ranking_mode)
|
| 1879 |
+
if not context:
|
| 1880 |
return "<div style=\"padding: 1rem; border: 1px solid #d6d0c4; background: #fffdf8; color: #3a3a3a;\">Choose a relationship to see its evidence window.</div>"
|
| 1881 |
+
row = context["row"]
|
| 1882 |
entries: list[tuple[int, str, str, str]] = []
|
| 1883 |
seen: set[tuple[str, str, str]] = set()
|
| 1884 |
+
for url in context["surfaced_urls"]:
|
| 1885 |
sort_key, window_label, track_label = _timeline_window_from_url(url)
|
| 1886 |
detail = url
|
| 1887 |
dedupe_key = (window_label, track_label, detail)
|
| 1888 |
if dedupe_key not in seen:
|
| 1889 |
seen.add(dedupe_key)
|
| 1890 |
entries.append((sort_key, window_label, track_label, detail))
|
| 1891 |
+
if any("Member profile" in item for item in context.get("link_type_mix", [])):
|
| 1892 |
entries.append((70, "Undated support", "Member profile support", "Profile-based support is included in this relationship summary."))
|
| 1893 |
if int(row.get("unresolved_source_ref_count", 0) or 0) > 0:
|
| 1894 |
entries.append((80, "Partly unresolved", "Some official references remain unresolved", f"{int(row.get('unresolved_source_ref_count', 0) or 0)} unresolved refs are still counted in this released row."))
|
|
|
|
| 2001 |
if score_label != "all":
|
| 2002 |
filtered = filtered[filtered["score_labels"].fillna("").str.contains(score_label, case=False, na=False)]
|
| 2003 |
if review_status == "stronger":
|
| 2004 |
+
stronger_mask = (
|
| 2005 |
+
((filtered["relationship_family"] == "recipient") & (filtered["relationship_status"] == "linked"))
|
| 2006 |
+
| (
|
| 2007 |
+
(filtered["relationship_family"] == "sector")
|
| 2008 |
+
& (filtered["relationship_status"] == "release_ok")
|
| 2009 |
+
& (filtered["strong_event_count"].fillna(0).astype(int) > 0)
|
| 2010 |
+
)
|
| 2011 |
+
)
|
| 2012 |
+
filtered = filtered[stronger_mask]
|
| 2013 |
elif review_status != "all":
|
| 2014 |
filtered = filtered[filtered["relationship_status"] == review_status]
|
| 2015 |
if hide_unresolved_only:
|
|
|
|
| 2103 |
if not node:
|
| 2104 |
continue
|
| 2105 |
node_type = str(node.get("node_type", ""))
|
| 2106 |
+
display_label = str(node.get("label", "") or "")
|
| 2107 |
+
if node_type == "sector":
|
| 2108 |
+
display_label = f"{re.sub(r'[_-]+', ' ', display_label).strip().title()} topic area"
|
| 2109 |
+
title_lines = [f"<b>{html.escape(display_label)}</b>"]
|
| 2110 |
role_label = {
|
| 2111 |
"member": "House member",
|
| 2112 |
"recipient": "Funding recipient",
|
| 2113 |
+
"sector": "Topic area",
|
| 2114 |
}.get(node_type, node_type.title())
|
| 2115 |
title_lines.append(f"Role: {html.escape(role_label)}")
|
| 2116 |
if node_type == "member":
|
|
|
|
| 2121 |
title_lines.append(f"Released relationships in graph data: {int(node.get('connected_edge_count', 0) or 0)}")
|
| 2122 |
network.add_node(
|
| 2123 |
node_id,
|
| 2124 |
+
label=display_label,
|
| 2125 |
title="<br>".join(title_lines),
|
| 2126 |
color=color_map.get(str(node.get("node_type", "")), "#6e6e6e"),
|
| 2127 |
shape="dot",
|
|
|
|
| 2139 |
if ":" in part
|
| 2140 |
]
|
| 2141 |
title_lines = [
|
| 2142 |
+
f"<b>{html.escape(str(row.get('member_name', '') or row.get('member_slug', '')))} -> {html.escape(_display_target_label(row))}</b>",
|
| 2143 |
f"Relationship type: {html.escape(family_label)}",
|
| 2144 |
f"Presentation tier: {html.escape(_plain_status_label(status))}",
|
| 2145 |
html.escape(_plain_status_explainer(status)),
|
|
|
|
| 2230 |
data = load_release_data(copy_path)
|
| 2231 |
manifest = data["manifest"]
|
| 2232 |
events = data["events"]
|
| 2233 |
+
links = data["links"]
|
| 2234 |
nodes = data["graph_nodes"]
|
| 2235 |
edges = data["graph_edges"]
|
| 2236 |
provenance = data["event_provenance"]
|
|
|
|
| 2287 |
valid_ids = {value for _, value in options}
|
| 2288 |
selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
|
| 2289 |
export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
|
| 2290 |
+
filtered_edges, links, events, selected or "", ranking_mode
|
| 2291 |
)
|
| 2292 |
return (
|
| 2293 |
_overview_summary_markdown(
|
|
|
|
| 2307 |
ranking_mode=ranking_mode,
|
| 2308 |
),
|
| 2309 |
gr.update(choices=options, value=selected),
|
| 2310 |
+
_relationship_detail_markdown(filtered_edges, links, events, selected or "", ranking_mode),
|
| 2311 |
+
_relationship_timeline_html(filtered_edges, links, events, selected or "", ranking_mode),
|
| 2312 |
export_note,
|
| 2313 |
export_csv,
|
| 2314 |
export_handoff_csv,
|
|
|
|
| 2325 |
):
|
| 2326 |
filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
|
| 2327 |
export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
|
| 2328 |
+
filtered_edges, links, events, relationship_id, ranking_mode
|
| 2329 |
)
|
| 2330 |
return (
|
| 2331 |
+
_relationship_detail_markdown(filtered_edges, links, events, relationship_id, ranking_mode),
|
| 2332 |
+
_relationship_timeline_html(filtered_edges, links, events, relationship_id, ranking_mode),
|
| 2333 |
export_note,
|
| 2334 |
export_csv,
|
| 2335 |
export_handoff_csv,
|