Polish example URLs, relative wording, and export names
Browse files
dataset_bundle/evidence_audit/consistency_report.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"generated_at": "2026-04-
|
| 3 |
"event_provenance": {
|
| 4 |
"event_count": 3918,
|
| 5 |
"events_with_artifacts": 3878,
|
|
|
|
| 1 |
{
|
| 2 |
+
"generated_at": "2026-04-19T21:24:27-04:00",
|
| 3 |
"event_provenance": {
|
| 4 |
"event_count": 3918,
|
| 5 |
"events_with_artifacts": 3878,
|
dataset_bundle/public_release_manifest.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"public_version": "congress-public-records-slice-2026-04-v1",
|
| 3 |
"title": "Congress Public Records Slice",
|
| 4 |
-
"release_date": "2026-04-
|
| 5 |
"slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
|
| 6 |
"source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
|
| 7 |
"dataset_repo_id": "cjc0013/cmp-data",
|
|
|
|
| 1 |
{
|
| 2 |
"public_version": "congress-public-records-slice-2026-04-v1",
|
| 3 |
"title": "Congress Public Records Slice",
|
| 4 |
+
"release_date": "2026-04-19T21:25:32-04:00",
|
| 5 |
"slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
|
| 6 |
"source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
|
| 7 |
"dataset_repo_id": "cjc0013/cmp-data",
|
public_space_app.py
CHANGED
|
@@ -1069,6 +1069,15 @@ def _relative_bucket(value: int) -> str:
|
|
| 1069 |
return "near this member's baseline"
|
| 1070 |
|
| 1071 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1072 |
def _relationship_strengtheners(link_rows: pd.DataFrame, event_rows: pd.DataFrame) -> list[str]:
|
| 1073 |
labels = [
|
| 1074 |
_plain_strengthener(item)
|
|
@@ -1184,6 +1193,7 @@ def _select_example_urls(
|
|
| 1184 |
records = _relationship_source_records(link_rows, event_rows, str(row.get("target_label") or ""))
|
| 1185 |
selected: list[str] = []
|
| 1186 |
selected_set: set[str] = set()
|
|
|
|
| 1187 |
|
| 1188 |
def choose_one(family_name: str) -> None:
|
| 1189 |
candidates = [
|
|
@@ -1207,10 +1217,39 @@ def _select_example_urls(
|
|
| 1207 |
if any(str(record.get("family") or "") == "lobbying activity" for record in records):
|
| 1208 |
choose_one("lobbying activity")
|
| 1209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1210 |
fallback_records = [
|
| 1211 |
record
|
| 1212 |
for record in records
|
| 1213 |
if str(record.get("url") or "") not in selected_set
|
|
|
|
| 1214 |
]
|
| 1215 |
family_order = {
|
| 1216 |
"bill record": 0,
|
|
@@ -1501,8 +1540,8 @@ def _overview_cards_html(
|
|
| 1501 |
relative_view = str(row.get("relative view", "") or "")
|
| 1502 |
score_note = _score_help_text(ranking_mode)
|
| 1503 |
ranking_mode_note = (
|
| 1504 |
-
f"<div class=\"result-note\"><strong>Experimental relative ordering:</strong>
|
| 1505 |
-
f"
|
| 1506 |
if str(ranking_mode or "raw").strip().lower() == "relative"
|
| 1507 |
else ""
|
| 1508 |
)
|
|
@@ -1529,7 +1568,7 @@ def _overview_cards_html(
|
|
| 1529 |
<div><strong>Needs caution</strong>{needs_caution}</div>
|
| 1530 |
<div><strong>Unresolved refs</strong>{unresolved_refs}</div>
|
| 1531 |
<div><strong>Raw score</strong>{raw_score}</div>
|
| 1532 |
-
<div><strong>
|
| 1533 |
</div>
|
| 1534 |
<div class="result-hint">Use Explain this link below to open the detailed breakdown and export files for this relationship.</div>
|
| 1535 |
</div>
|
|
@@ -1588,7 +1627,7 @@ def _relationship_detail_markdown(
|
|
| 1588 |
f"- Public score shown on the card: `{raw_score}`",
|
| 1589 |
f"- Raw score: `{raw_score}`",
|
| 1590 |
f"- Relative-to-baseline score (experimental): `{relative_score}`",
|
| 1591 |
-
f"-
|
| 1592 |
f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
|
| 1593 |
f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
|
| 1594 |
f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
|
|
@@ -1635,6 +1674,13 @@ def _safe_export_stem(value: str) -> str:
|
|
| 1635 |
return slug or "relationship-export"
|
| 1636 |
|
| 1637 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1638 |
def _relationship_export_rows(
|
| 1639 |
edges: pd.DataFrame,
|
| 1640 |
links: pd.DataFrame,
|
|
@@ -1832,10 +1878,10 @@ def _write_relationship_export_bundle(
|
|
| 1832 |
relationship_id_value = str(export_rows[0]["relationship_id"] or relationship_id)
|
| 1833 |
export_dir = Path(tempfile.gettempdir()) / "cmp_space_exports"
|
| 1834 |
export_dir.mkdir(parents=True, exist_ok=True)
|
| 1835 |
-
stem =
|
| 1836 |
-
csv_path = export_dir / f"{stem}-
|
| 1837 |
handoff_csv_path = export_dir / f"{stem}-handoff.csv"
|
| 1838 |
-
pdf_path = export_dir / f"{stem}.pdf"
|
| 1839 |
|
| 1840 |
fieldnames = [
|
| 1841 |
"relationship_id",
|
|
@@ -1900,7 +1946,7 @@ def _write_relationship_export_bundle(
|
|
| 1900 |
pdf.save()
|
| 1901 |
note = (
|
| 1902 |
f"Prepared deterministic export files for `{relationship_id_value}`. "
|
| 1903 |
-
"Use the
|
| 1904 |
)
|
| 1905 |
return note, str(csv_path), str(handoff_csv_path), str(pdf_path)
|
| 1906 |
|
|
|
|
| 1069 |
return "near this member's baseline"
|
| 1070 |
|
| 1071 |
|
| 1072 |
+
def _relative_view_explainer(value: int) -> str:
|
| 1073 |
+
score = int(value or 0)
|
| 1074 |
+
if score >= 70:
|
| 1075 |
+
return "In this filtered view, this relationship looks stronger than this member's other visible links."
|
| 1076 |
+
if score <= 30:
|
| 1077 |
+
return "In this filtered view, this relationship looks weaker than this member's other visible links."
|
| 1078 |
+
return "In this filtered view, this relationship looks similar to this member's other visible links."
|
| 1079 |
+
|
| 1080 |
+
|
| 1081 |
def _relationship_strengtheners(link_rows: pd.DataFrame, event_rows: pd.DataFrame) -> list[str]:
|
| 1082 |
labels = [
|
| 1083 |
_plain_strengthener(item)
|
|
|
|
| 1193 |
records = _relationship_source_records(link_rows, event_rows, str(row.get("target_label") or ""))
|
| 1194 |
selected: list[str] = []
|
| 1195 |
selected_set: set[str] = set()
|
| 1196 |
+
normalized_target = _relationship_target_key(str(row.get("target_label") or ""))
|
| 1197 |
|
| 1198 |
def choose_one(family_name: str) -> None:
|
| 1199 |
candidates = [
|
|
|
|
| 1217 |
if any(str(record.get("family") or "") == "lobbying activity" for record in records):
|
| 1218 |
choose_one("lobbying activity")
|
| 1219 |
|
| 1220 |
+
bill_like_records = [
|
| 1221 |
+
record
|
| 1222 |
+
for record in records
|
| 1223 |
+
if str(record.get("family") or "") in {"bill record", "vote activity"} and str(record.get("url") or "") not in selected_set
|
| 1224 |
+
]
|
| 1225 |
+
preferred_hints = TOPIC_AREA_PREFERRED_BILL_HINTS.get(normalized_target, ())
|
| 1226 |
+
preferred_bill_like_records = [
|
| 1227 |
+
record
|
| 1228 |
+
for record in bill_like_records
|
| 1229 |
+
if any(hint in str(record.get("url") or "").lower() for hint in preferred_hints)
|
| 1230 |
+
]
|
| 1231 |
+
if preferred_bill_like_records:
|
| 1232 |
+
chosen = sorted(
|
| 1233 |
+
preferred_bill_like_records,
|
| 1234 |
+
key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")),
|
| 1235 |
+
)[0]
|
| 1236 |
+
url = str(chosen.get("url") or "")
|
| 1237 |
+
selected.append(url)
|
| 1238 |
+
selected_set.add(url)
|
| 1239 |
+
elif bill_like_records:
|
| 1240 |
+
chosen = sorted(
|
| 1241 |
+
bill_like_records,
|
| 1242 |
+
key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")),
|
| 1243 |
+
)[0]
|
| 1244 |
+
url = str(chosen.get("url") or "")
|
| 1245 |
+
selected.append(url)
|
| 1246 |
+
selected_set.add(url)
|
| 1247 |
+
|
| 1248 |
fallback_records = [
|
| 1249 |
record
|
| 1250 |
for record in records
|
| 1251 |
if str(record.get("url") or "") not in selected_set
|
| 1252 |
+
and str(record.get("family") or "") not in {"bill record", "vote activity"}
|
| 1253 |
]
|
| 1254 |
family_order = {
|
| 1255 |
"bill record": 0,
|
|
|
|
| 1540 |
relative_view = str(row.get("relative view", "") or "")
|
| 1541 |
score_note = _score_help_text(ranking_mode)
|
| 1542 |
ranking_mode_note = (
|
| 1543 |
+
f"<div class=\"result-note\"><strong>Experimental relative ordering:</strong> "
|
| 1544 |
+
f"{html.escape(_relative_view_explainer(relative_score))}</div>"
|
| 1545 |
if str(ranking_mode or "raw").strip().lower() == "relative"
|
| 1546 |
else ""
|
| 1547 |
)
|
|
|
|
| 1568 |
<div><strong>Needs caution</strong>{needs_caution}</div>
|
| 1569 |
<div><strong>Unresolved refs</strong>{unresolved_refs}</div>
|
| 1570 |
<div><strong>Raw score</strong>{raw_score}</div>
|
| 1571 |
+
<div><strong>Experimental relative note</strong>{html.escape(_relative_view_explainer(relative_score))}</div>
|
| 1572 |
</div>
|
| 1573 |
<div class="result-hint">Use Explain this link below to open the detailed breakdown and export files for this relationship.</div>
|
| 1574 |
</div>
|
|
|
|
| 1627 |
f"- Public score shown on the card: `{raw_score}`",
|
| 1628 |
f"- Raw score: `{raw_score}`",
|
| 1629 |
f"- Relative-to-baseline score (experimental): `{relative_score}`",
|
| 1630 |
+
f"- Experimental relative note: {_relative_view_explainer(relative_score)}",
|
| 1631 |
f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
|
| 1632 |
f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
|
| 1633 |
f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
|
|
|
|
| 1674 |
return slug or "relationship-export"
|
| 1675 |
|
| 1676 |
|
| 1677 |
+
def _export_bundle_stem(relationship_id_value: str, ranking_mode: str) -> str:
|
| 1678 |
+
base = _safe_export_stem(relationship_id_value)
|
| 1679 |
+
if str(ranking_mode or "raw").strip().lower() == "relative":
|
| 1680 |
+
return f"{base}-experimental-view"
|
| 1681 |
+
return base
|
| 1682 |
+
|
| 1683 |
+
|
| 1684 |
def _relationship_export_rows(
|
| 1685 |
edges: pd.DataFrame,
|
| 1686 |
links: pd.DataFrame,
|
|
|
|
| 1878 |
relationship_id_value = str(export_rows[0]["relationship_id"] or relationship_id)
|
| 1879 |
export_dir = Path(tempfile.gettempdir()) / "cmp_space_exports"
|
| 1880 |
export_dir.mkdir(parents=True, exist_ok=True)
|
| 1881 |
+
stem = _export_bundle_stem(relationship_id_value, ranking_mode)
|
| 1882 |
+
csv_path = export_dir / f"{stem}-machine.csv"
|
| 1883 |
handoff_csv_path = export_dir / f"{stem}-handoff.csv"
|
| 1884 |
+
pdf_path = export_dir / f"{stem}-summary.pdf"
|
| 1885 |
|
| 1886 |
fieldnames = [
|
| 1887 |
"relationship_id",
|
|
|
|
| 1946 |
pdf.save()
|
| 1947 |
note = (
|
| 1948 |
f"Prepared deterministic export files for `{relationship_id_value}`. "
|
| 1949 |
+
"Use the machine CSV for row-complete exports, the handoff CSV for a cleaner reporter view, and the summary PDF for a fixed printable brief."
|
| 1950 |
)
|
| 1951 |
return note, str(csv_path), str(handoff_csv_path), str(pdf_path)
|
| 1952 |
|