Spaces:

cjc0013
/

cmp

Running

App Files Files Community

cjc0013 commited on Apr 20

Commit

6a78301

verified ·

1 Parent(s): f14edf7

Refresh public Space consistency and detail copy

Browse files

Files changed (3) hide show

dataset_bundle/evidence_audit/consistency_report.json +1 -1
dataset_bundle/public_release_manifest.json +1 -1
public_space_app.py +82 -25

dataset_bundle/evidence_audit/consistency_report.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "generated_at": "2026-04-19T19:38:57-04:00",
   "event_provenance": {
     "event_count": 3918,
     "events_with_artifacts": 3878,

 {
+  "generated_at": "2026-04-19T20:18:33-04:00",
   "event_provenance": {
     "event_count": 3918,
     "events_with_artifacts": 3878,

dataset_bundle/public_release_manifest.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "public_version": "congress-public-records-slice-2026-04-v1",
   "title": "Congress Public Records Slice",
-  "release_date": "2026-04-19T19:40:15-04:00",
   "slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
   "source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
   "dataset_repo_id": "cjc0013/cmp-data",

 {
   "public_version": "congress-public-records-slice-2026-04-v1",
   "title": "Congress Public Records Slice",
+  "release_date": "2026-04-19T20:19:47-04:00",
   "slice_description": "A neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.",
   "source_run_name": "house_all_baseline_20260418_v21_recipienthardening",
   "dataset_repo_id": "cjc0013/cmp-data",

public_space_app.py CHANGED Viewed

@@ -842,7 +842,7 @@ def _plain_strengthener(value: str) -> str:
     normalized = str(value or "").strip()
     mapping = {
         "bill_sector_mapping_weak": "Requires stronger correlation between the trade window and related bill subject matter.",
-        "donor_industry_mapping_weak": "Missing granular donor industry classification.",
         "committee_history_missing": "Committee history is missing or incomplete for this row.",
         "lobbying_issue_mapping_weak": "Requires clearer mapping between lobbying issue tags and the policy area in this row.",
         "recipient_identity_ambiguous": "The recipient identity needs a cleaner match before this can be treated as a stronger link.",
@@ -868,7 +868,7 @@ def _evidence_chip_help(label: str) -> str:
         "annual disclosure": "Annual financial disclosure records support this relationship.",
         "bill record": "Bill-status records help show legislative activity in the same topic area.",
         "funding award": "Published federal award records support a funding-recipient link in this slice.",
-        "committee roster": "Committee records show committee context related to the same topic area.",
         "vote activity": "Roll-call vote records add legislative activity in the same topic window.",
         "lobbying activity": "Lobbying filings add public activity in the same issue area.",
         "member profile": "Member-published profile or committee context contributes to this relationship summary.",
@@ -881,8 +881,8 @@ def _score_help_text(ranking_mode: str) -> str:
     normalized = str(ranking_mode or "raw").strip().lower()
     if normalized == "relative":
         return (
-            "Experimental relative score. It compares this relationship with the same member's other visible "
-            "relationships in the current view so unusually strong links stand out against that member's baseline activity."
         )
     return (
         "Raw score. It favors clearer public support, more supporting rows, more integrity-checked records, "
@@ -1017,14 +1017,42 @@ def _collect_pipe_values(frame: pd.DataFrame, column: str, *, limit: int = 20) -
     return items
-def _relationship_reason_labels(link_rows: pd.DataFrame, event_rows: pd.DataFrame, row: Dict[str, Any]) -> list[str]:
     labels = [
         _plain_reason_code(item)
-        for item in _collect_pipe_values(link_rows, "reason_codes", limit=20)
-        + _collect_pipe_values(event_rows, "reason_codes", limit=20)
     ]
-    if not labels:
-        labels = [_plain_reason_code(item) for item in _split_pipe_values(row.get("reason_codes", ""), limit=20)]
     ordered: list[str] = []
     for label in labels:
         if label and label not in ordered:
@@ -1032,6 +1060,15 @@ def _relationship_reason_labels(link_rows: pd.DataFrame, event_rows: pd.DataFram
     return ordered[:10]
 def _relationship_strengtheners(link_rows: pd.DataFrame, event_rows: pd.DataFrame) -> list[str]:
     labels = [
         _plain_strengthener(item)
@@ -1216,21 +1253,21 @@ def _relationship_context(
     link_rows, event_rows = _relationship_constituents(links, events, row)
     raw_score = _relationship_score(row)
     relative_score = _relative_relationship_score(row, _member_activity_baselines(edges))
-    normalized_mode = str(ranking_mode or "raw").strip().lower()
-    display_score = relative_score if normalized_mode == "relative" else raw_score
     surfaced_urls = _select_example_urls(row, link_rows, event_rows, limit=6)
     all_urls = [record.get("url", "") for record in sorted(_relationship_source_records(link_rows, event_rows, str(row.get("target_label") or "")), key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")))]
     all_urls = [url for url in all_urls if url]
-    reason_labels = _relationship_reason_labels(link_rows, event_rows, row)
     strengtheners = _relationship_strengtheners(link_rows, event_rows)
     sha_values = _relationship_sha_values(link_rows, event_rows)
-    evidence_chips = _edge_evidence_chips(row, surfaced_urls)
     link_type_mix = _relationship_link_type_mix(link_rows)
     return {
         "row": row,
         "raw_score": raw_score,
         "relative_score": relative_score,
         "display_score": display_score,
         "surfaced_urls": surfaced_urls,
         "all_urls": all_urls,
         "reason_labels": reason_labels,
@@ -1324,8 +1361,10 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
         "member",
         "counterparty / sector",
         "overall score",
         "raw score",
         "relative score",
         "strength",
         "evidence",
         "time-window overlap",
@@ -1351,15 +1390,17 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
         chips = _edge_evidence_chips(row)
         raw_score = _relationship_score(row)
         relative_score = _relative_relationship_score(row, baselines)
-        display_score = relative_score if normalized_mode == "relative" else raw_score
         rows.append(
             {
                 "relationship_id": str(row.get("edge_id") or ""),
                 "member": str(row.get("member_name") or row.get("member_slug") or ""),
                 "counterparty / sector": _display_target_label(row),
-                "overall score": display_score,
                 "raw score": raw_score,
                 "relative score": relative_score,
                 "status_code": str(row.get("relationship_status", "") or ""),
                 "strength": _plain_status_label(str(row.get("relationship_status", "") or "")),
                 "evidence": " | ".join(chips) if chips else "published source support",
@@ -1372,8 +1413,8 @@ def _rank_relationships(edges: pd.DataFrame, ranking_mode: str = "raw") -> pd.Da
             }
         )
     ranked = pd.DataFrame(rows).sort_values(
-        ["overall score", "supporting rows", "stronger support", "counterparty / sector"],
-        ascending=[False, False, False, True],
     ).reset_index(drop=True)
     ranked.insert(0, "rank", range(1, len(ranked) + 1))
     return ranked
@@ -1406,7 +1447,8 @@ def _overview_summary_markdown(
         f"- Showing the top `{min(int(top_n), len(ranked))}` `{_plain_family_label(family).lower()}` for `{focus_label}`.",
         f"- Filtered to stronger links only: `{str(bool(only_strong_links)).lower()}`.",
         f"- Ranking mode: `{'experimental relative to this member baseline' if str(ranking_mode or 'raw').strip().lower() == 'relative' else 'raw score'}`.",
-        f"- Highest score in this view: `{int(ranked['overall score'].max())}`.",
         "- Pick one relationship below to see the evidence breakdown and coarse evidence window.",
     ]
     if not str(member_query or "").strip():
@@ -1438,7 +1480,9 @@ def _overview_cards_html(
         f"for <strong>{html.escape(focus_label)}</strong>. "
         f"Filtered to stronger links only: <strong>{'yes' if bool(only_strong_links) else 'no'}</strong>. "
         f"Ranking mode: <strong>{'experimental relative to this member baseline' if str(ranking_mode or 'raw').strip().lower() == 'relative' else 'raw score'}</strong>. "
-        "Hover over score badges and evidence chips for why they matter. Pick one relationship below to open the plain-English explanation and evidence window."
         "</div>"
     )
     cards: list[str] = []
@@ -1454,7 +1498,14 @@ def _overview_cards_html(
         unresolved_refs = int(row.get("unresolved refs", 0) or 0)
         raw_score = int(row.get("raw score", 0) or 0)
         relative_score = int(row.get("relative score", 0) or 0)
         score_note = _score_help_text(ranking_mode)
         cards.append(
             f"""
             <div class="result-card">
@@ -1465,11 +1516,12 @@ def _overview_cards_html(
                   <div class="result-subtitle">For {html.escape(str(row.get("member", "") or ""))} in the {_plain_family_label(family).lower()} view.</div>
                 </div>
                 <div class="metric-stack">
-                  <span class="score-pill" title="{html.escape(score_note)}">Score {int(row.get("overall score", 0) or 0)}</span>
                   <span class="strength-pill" title="{html.escape(_plain_status_explainer(str(row.get('status_code', '') or '')))}">{html.escape(str(row.get("strength", "") or ""))}</span>
                 </div>
               </div>
               <div class="chip-row">{chip_html or '<span class="chip">published source support</span>'}</div>
               <div class="meta-grid">
                 <div><strong>Evidence window</strong>{html.escape(str(row.get("time-window overlap", "") or ""))}</div>
                 <div><strong>Supporting rows</strong>{supporting_rows}</div>
@@ -1477,7 +1529,7 @@ def _overview_cards_html(
                 <div><strong>Needs caution</strong>{needs_caution}</div>
                 <div><strong>Unresolved refs</strong>{unresolved_refs}</div>
                 <div><strong>Raw score</strong>{raw_score}</div>
-                <div><strong>Relative score</strong>{relative_score}</div>
               </div>
               <div class="result-hint">Use Explain this link below to open the detailed breakdown and export files for this relationship.</div>
             </div>
@@ -1496,7 +1548,7 @@ def _relationship_options(ranked: pd.DataFrame) -> list[tuple[str, str]]:
         return []
     options: list[tuple[str, str]] = []
     for row in ranked.to_dict("records"):
-        label = f"#{int(row['rank'])} {row['counterparty / sector']} - {row['strength']} (score {row['overall score']})"
         options.append((label, str(row["relationship_id"])))
     return options
@@ -1527,15 +1579,16 @@ def _relationship_detail_markdown(
     urls = context["surfaced_urls"]
     raw_score = int(context["raw_score"])
     relative_score = int(context["relative_score"])
-    display_score = int(context["display_score"])
     lines = [
         f"### {row.get('member_name') or row.get('member_slug')} -> {context['display_target_label']}",
         "",
         f"- Relationship view: `{_plain_family_label(family)}`",
         f"- Strength label: `{_plain_status_label(str(row.get('relationship_status', '') or ''))}`",
-        f"- Displayed score in this view: `{display_score}`",
         f"- Raw score: `{raw_score}`",
         f"- Relative-to-baseline score (experimental): `{relative_score}`",
         f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
         f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
         f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
@@ -1548,6 +1601,10 @@ def _relationship_detail_markdown(
         lines.append(f"- Released row kinds involved: `{'; '.join(context['link_type_mix'])}`")
     if context["topic_area_note"]:
         lines.append(f"- Topic-area note: {context['topic_area_note']}")
     if reason_codes:
         lines.extend(["", "#### Why it is linked in this slice", ""])
         lines.extend(f"- {item}" for item in reason_codes)
@@ -1864,7 +1921,7 @@ def _timeline_window_from_url(url: str) -> tuple[int, str, str]:
     if "usaspending.gov/award/" in normalized:
         return (40, "Published award record", "Federal award record")
     if "committee_info" in normalized:
-        return (50, "Current committee reference", "Committee roster")
     return (60, "Published source", urlparse(normalized).netloc if normalized.startswith("http") else "Published source")

     normalized = str(value or "").strip()
     mapping = {
         "bill_sector_mapping_weak": "Requires stronger correlation between the trade window and related bill subject matter.",
+        "donor_industry_mapping_weak": "More granular industry tagging would improve precision.",
         "committee_history_missing": "Committee history is missing or incomplete for this row.",
         "lobbying_issue_mapping_weak": "Requires clearer mapping between lobbying issue tags and the policy area in this row.",
         "recipient_identity_ambiguous": "The recipient identity needs a cleaner match before this can be treated as a stronger link.",
         "annual disclosure": "Annual financial disclosure records support this relationship.",
         "bill record": "Bill-status records help show legislative activity in the same topic area.",
         "funding award": "Published federal award records support a funding-recipient link in this slice.",
+        "committee roster": "Committee records here provide current committee context. They are not presented as exact time-overlap proof.",
         "vote activity": "Roll-call vote records add legislative activity in the same topic window.",
         "lobbying activity": "Lobbying filings add public activity in the same issue area.",
         "member profile": "Member-published profile or committee context contributes to this relationship summary.",
     normalized = str(ranking_mode or "raw").strip().lower()
     if normalized == "relative":
         return (
+            "Raw score is still the main public score shown on the card. Experimental relative ordering only changes how the list is sorted "
+            "compared with the same member's other visible relationships in the current view."
         )
     return (
         "Raw score. It favors clearer public support, more supporting rows, more integrity-checked records, "
     return items
+def _relationship_reason_codes(link_rows: pd.DataFrame, event_rows: pd.DataFrame, row: Dict[str, Any]) -> list[str]:
+    codes = _collect_pipe_values(link_rows, "reason_codes", limit=20) + _collect_pipe_values(event_rows, "reason_codes", limit=20)
+    if not codes:
+        codes = _split_pipe_values(row.get("reason_codes", ""), limit=20)
+    ordered: list[str] = []
+    for code in codes:
+        normalized = str(code or "").strip()
+        if normalized and normalized not in ordered:
+            ordered.append(normalized)
+    return ordered[:12]
+def _reason_visible_in_public_card(reason_code: str, evidence_chips: list[str]) -> bool:
+    chip_set = {str(item or "").strip().lower() for item in evidence_chips}
+    requirements = {
+        "committee_jurisdiction_match": {"committee roster"},
+        "major_vote_overlap": {"vote activity"},
+        "vote_density_support": {"vote activity"},
+        "lobbying_issue_overlap": {"lobbying activity"},
+        "lobbying_density_support": {"lobbying activity"},
+        "bill_sponsor_overlap": {"bill record", "vote activity"},
+        "legislative_relevance_match": {"bill record", "vote activity"},
+        "legislative_density_support": {"bill record", "vote activity"},
+    }
+    required = requirements.get(str(reason_code or "").strip())
+    if not required:
+        return True
+    return bool(chip_set.intersection(required))
+def _relationship_reason_labels(link_rows: pd.DataFrame, event_rows: pd.DataFrame, row: Dict[str, Any], evidence_chips: list[str]) -> list[str]:
     labels = [
         _plain_reason_code(item)
+        for item in _relationship_reason_codes(link_rows, event_rows, row)
+        if _reason_visible_in_public_card(item, evidence_chips)
     ]
     ordered: list[str] = []
     for label in labels:
         if label and label not in ordered:
     return ordered[:10]
+def _relative_bucket(value: int) -> str:
+    score = int(value or 0)
+    if score >= 70:
+        return "above this member's baseline"
+    if score <= 30:
+        return "below this member's baseline"
+    return "near this member's baseline"
 def _relationship_strengtheners(link_rows: pd.DataFrame, event_rows: pd.DataFrame) -> list[str]:
     labels = [
         _plain_strengthener(item)
     link_rows, event_rows = _relationship_constituents(links, events, row)
     raw_score = _relationship_score(row)
     relative_score = _relative_relationship_score(row, _member_activity_baselines(edges))
+    display_score = raw_score
     surfaced_urls = _select_example_urls(row, link_rows, event_rows, limit=6)
     all_urls = [record.get("url", "") for record in sorted(_relationship_source_records(link_rows, event_rows, str(row.get("target_label") or "")), key=lambda item: _source_record_priority(item, str(row.get("target_label") or "")))]
     all_urls = [url for url in all_urls if url]
+    evidence_chips = _edge_evidence_chips(row, surfaced_urls)
+    reason_labels = _relationship_reason_labels(link_rows, event_rows, row, evidence_chips)
     strengtheners = _relationship_strengtheners(link_rows, event_rows)
     sha_values = _relationship_sha_values(link_rows, event_rows)
     link_type_mix = _relationship_link_type_mix(link_rows)
     return {
         "row": row,
         "raw_score": raw_score,
         "relative_score": relative_score,
         "display_score": display_score,
+        "relative_bucket": _relative_bucket(relative_score),
         "surfaced_urls": surfaced_urls,
         "all_urls": all_urls,
         "reason_labels": reason_labels,
         "member",
         "counterparty / sector",
         "overall score",
+        "sort score",
         "raw score",
         "relative score",
+        "relative view",
         "strength",
         "evidence",
         "time-window overlap",
         chips = _edge_evidence_chips(row)
         raw_score = _relationship_score(row)
         relative_score = _relative_relationship_score(row, baselines)
+        sort_score = relative_score if normalized_mode == "relative" else raw_score
         rows.append(
             {
                 "relationship_id": str(row.get("edge_id") or ""),
                 "member": str(row.get("member_name") or row.get("member_slug") or ""),
                 "counterparty / sector": _display_target_label(row),
+                "overall score": raw_score,
+                "sort score": sort_score,
                 "raw score": raw_score,
                 "relative score": relative_score,
+                "relative view": _relative_bucket(relative_score),
                 "status_code": str(row.get("relationship_status", "") or ""),
                 "strength": _plain_status_label(str(row.get("relationship_status", "") or "")),
                 "evidence": " | ".join(chips) if chips else "published source support",
             }
         )
     ranked = pd.DataFrame(rows).sort_values(
+        ["sort score", "overall score", "supporting rows", "stronger support", "counterparty / sector"],
+        ascending=[False, False, False, False, True],
     ).reset_index(drop=True)
     ranked.insert(0, "rank", range(1, len(ranked) + 1))
     return ranked
         f"- Showing the top `{min(int(top_n), len(ranked))}` `{_plain_family_label(family).lower()}` for `{focus_label}`.",
         f"- Filtered to stronger links only: `{str(bool(only_strong_links)).lower()}`.",
         f"- Ranking mode: `{'experimental relative to this member baseline' if str(ranking_mode or 'raw').strip().lower() == 'relative' else 'raw score'}`.",
+        f"- Highest raw score in this view: `{int(ranked['overall score'].max())}`.",
+        "- `Only stronger links` filters by the overall relationship bucket. A visible card can still include some caution rows inside it.",
         "- Pick one relationship below to see the evidence breakdown and coarse evidence window.",
     ]
     if not str(member_query or "").strip():
         f"for <strong>{html.escape(focus_label)}</strong>. "
         f"Filtered to stronger links only: <strong>{'yes' if bool(only_strong_links) else 'no'}</strong>. "
         f"Ranking mode: <strong>{'experimental relative to this member baseline' if str(ranking_mode or 'raw').strip().lower() == 'relative' else 'raw score'}</strong>. "
+        "Hover over score badges and evidence chips for why they matter. "
+        "A card can still include some caution rows here because the stronger-only filter applies to the overall relationship bucket, not every contributing row. "
+        "Pick one relationship below to open the plain-English explanation and evidence window."
         "</div>"
     )
     cards: list[str] = []
         unresolved_refs = int(row.get("unresolved refs", 0) or 0)
         raw_score = int(row.get("raw score", 0) or 0)
         relative_score = int(row.get("relative score", 0) or 0)
+        relative_view = str(row.get("relative view", "") or "")
         score_note = _score_help_text(ranking_mode)
+        ranking_mode_note = (
+            f"<div class=\"result-note\"><strong>Experimental relative ordering:</strong> this relationship currently ranks "
+            f"<strong>{html.escape(relative_view)}</strong> ({relative_score}) compared with the same member's other visible links.</div>"
+            if str(ranking_mode or "raw").strip().lower() == "relative"
+            else ""
+        )
         cards.append(
             f"""
             <div class="result-card">
                   <div class="result-subtitle">For {html.escape(str(row.get("member", "") or ""))} in the {_plain_family_label(family).lower()} view.</div>
                 </div>
                 <div class="metric-stack">
+                  <span class="score-pill" title="{html.escape(score_note)}">Raw score {raw_score}</span>
                   <span class="strength-pill" title="{html.escape(_plain_status_explainer(str(row.get('status_code', '') or '')))}">{html.escape(str(row.get("strength", "") or ""))}</span>
                 </div>
               </div>
               <div class="chip-row">{chip_html or '<span class="chip">published source support</span>'}</div>
+              {ranking_mode_note}
               <div class="meta-grid">
                 <div><strong>Evidence window</strong>{html.escape(str(row.get("time-window overlap", "") or ""))}</div>
                 <div><strong>Supporting rows</strong>{supporting_rows}</div>
                 <div><strong>Needs caution</strong>{needs_caution}</div>
                 <div><strong>Unresolved refs</strong>{unresolved_refs}</div>
                 <div><strong>Raw score</strong>{raw_score}</div>
+                <div><strong>Relative view</strong>{html.escape(relative_view)} ({relative_score})</div>
               </div>
               <div class="result-hint">Use Explain this link below to open the detailed breakdown and export files for this relationship.</div>
             </div>
         return []
     options: list[tuple[str, str]] = []
     for row in ranked.to_dict("records"):
+        label = f"#{int(row['rank'])} {row['counterparty / sector']} - {row['strength']} (raw {row['overall score']})"
         options.append((label, str(row["relationship_id"])))
     return options
     urls = context["surfaced_urls"]
     raw_score = int(context["raw_score"])
     relative_score = int(context["relative_score"])
     lines = [
         f"### {row.get('member_name') or row.get('member_slug')} -> {context['display_target_label']}",
         "",
+        "- This is a lead for inspection, not a claim of wrongdoing, intent, causality, or exact chronology.",
         f"- Relationship view: `{_plain_family_label(family)}`",
         f"- Strength label: `{_plain_status_label(str(row.get('relationship_status', '') or ''))}`",
+        f"- Public score shown on the card: `{raw_score}`",
         f"- Raw score: `{raw_score}`",
         f"- Relative-to-baseline score (experimental): `{relative_score}`",
+        f"- Relative view in the current filter set: `{context['relative_bucket']}`",
         f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
         f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
         f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
         lines.append(f"- Released row kinds involved: `{'; '.join(context['link_type_mix'])}`")
     if context["topic_area_note"]:
         lines.append(f"- Topic-area note: {context['topic_area_note']}")
+    if "committee roster" in chips:
+        lines.append(
+            "- Committee context note: committee records shown here provide current reference context and are not part of an exact time-overlap claim."
+        )
     if reason_codes:
         lines.extend(["", "#### Why it is linked in this slice", ""])
         lines.extend(f"- {item}" for item in reason_codes)
     if "usaspending.gov/award/" in normalized:
         return (40, "Published award record", "Federal award record")
     if "committee_info" in normalized:
+        return (50, "Current reference only", "Committee context (not part of the time-overlap claim)")
     return (60, "Published source", urlparse(normalized).netloc if normalized.startswith("http") else "Published source")