Add plain-English what-this-is framing to Space
Browse files- public_copy.json +1 -1
- public_space_app.py +102 -0
public_copy.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
"subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
|
| 5 |
"dataset_repo_id": "cjc0013/cmp-data",
|
| 6 |
"space_repo_id": "cjc0013/cmp",
|
| 7 |
-
"welcome_markdown": "# Congress Public Records Slice\n\nStart with **
|
| 8 |
"landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
|
| 9 |
"downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
|
| 10 |
"dataset_bundle_prefix": "dataset_bundle"
|
|
|
|
| 4 |
"subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
|
| 5 |
"dataset_repo_id": "cjc0013/cmp-data",
|
| 6 |
"space_repo_id": "cjc0013/cmp",
|
| 7 |
+
"welcome_markdown": "# Congress Public Records Slice\n\nStart with **What This Is**, then use **Overview**.\n\n- **What This Is** explains, in plain English, what this release contains and what it does not claim.\n- Pick one House member first.\n- Use **Overview** to see the strongest sectors or funding recipients for that member.\n- Use **Explain Link** to see why one relationship appears in this released slice.\n- Use **Explore Graph** only if you want a secondary visual map.\n\nThis is an exploration tool, not an accusation tool.",
|
| 8 |
"landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
|
| 9 |
"downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
|
| 10 |
"dataset_bundle_prefix": "dataset_bundle"
|
public_space_app.py
CHANGED
|
@@ -86,6 +86,102 @@ def _member_search_mask(frame: pd.DataFrame, query: str) -> pd.Series:
|
|
| 86 |
return name_series.str.contains(query, case=False, na=False) | slug_series.str.contains(query, case=False, na=False)
|
| 87 |
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
def _plain_status_label(value: str) -> str:
|
| 90 |
normalized = str(value or "").strip()
|
| 91 |
mapping = {
|
|
@@ -778,6 +874,7 @@ def _event_detail(events: pd.DataFrame, provenance: pd.DataFrame, event_id: str)
|
|
| 778 |
|
| 779 |
def build_app(copy_path: str | Path):
|
| 780 |
data = load_release_data(copy_path)
|
|
|
|
| 781 |
events = data["events"]
|
| 782 |
nodes = data["graph_nodes"]
|
| 783 |
edges = data["graph_edges"]
|
|
@@ -810,6 +907,10 @@ def build_app(copy_path: str | Path):
|
|
| 810 |
|
| 811 |
with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
|
| 812 |
gr.Markdown(copy_payload.get("welcome_markdown", copy_payload.get("landing_markdown", "")))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 813 |
with gr.Tab("Overview"):
|
| 814 |
gr.Markdown(
|
| 815 |
"### Start here\n\n"
|
|
@@ -935,6 +1036,7 @@ def build_app(copy_path: str | Path):
|
|
| 935 |
gr.Dataframe(value=data["artifact_index"].head(200), interactive=False)
|
| 936 |
with gr.Tab("Methodology & Limits"):
|
| 937 |
gr.Markdown(copy_payload.get("landing_markdown", ""))
|
|
|
|
| 938 |
gr.Markdown(copy_payload.get("downloads_markdown", ""))
|
| 939 |
with gr.Tab("Downloads"):
|
| 940 |
gr.Markdown(copy_payload.get("downloads_markdown", ""))
|
|
|
|
| 86 |
return name_series.str.contains(query, case=False, na=False) | slug_series.str.contains(query, case=False, na=False)
|
| 87 |
|
| 88 |
|
| 89 |
+
def _split_source_group_lines(text: Any) -> list[str]:
|
| 90 |
+
lines = []
|
| 91 |
+
for raw_line in str(text or "").splitlines():
|
| 92 |
+
cleaned = raw_line.strip().lstrip("-").strip()
|
| 93 |
+
if cleaned:
|
| 94 |
+
lines.append(cleaned)
|
| 95 |
+
if "USAspending award pages used for some recipient matching" not in lines:
|
| 96 |
+
lines.append("USAspending award pages used for some recipient matching")
|
| 97 |
+
return lines
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def _about_release_markdown(
|
| 101 |
+
manifest: Dict[str, Any],
|
| 102 |
+
recipient_link_quality: Dict[str, Any],
|
| 103 |
+
source_quality: Dict[str, Any],
|
| 104 |
+
) -> str:
|
| 105 |
+
counts = manifest.get("counts") or {}
|
| 106 |
+
caveats = manifest.get("caveats") or []
|
| 107 |
+
label_counts = recipient_link_quality.get("label_counts") or {}
|
| 108 |
+
return "\n".join(
|
| 109 |
+
[
|
| 110 |
+
"## What this is",
|
| 111 |
+
"",
|
| 112 |
+
"This tool is a public-records explorer for one released slice of House data.",
|
| 113 |
+
"",
|
| 114 |
+
"It brings together records about House members, financial disclosures, legislative activity, lobbying visibility, and some community project funding recipient relationships.",
|
| 115 |
+
"",
|
| 116 |
+
"The goal is simple: help you inspect where public records overlap, then click through to the published source URLs and SHA-backed artifacts.",
|
| 117 |
+
"",
|
| 118 |
+
"It does **not** accuse anyone of a crime, corruption, or wrongdoing.",
|
| 119 |
+
"It does **not** prove intent or causality.",
|
| 120 |
+
"It does **not** claim this is the full universe of relevant data.",
|
| 121 |
+
"",
|
| 122 |
+
"## What is in this release",
|
| 123 |
+
"",
|
| 124 |
+
f"- House members in this slice: `{int(counts.get('members', 0) or 0)}`",
|
| 125 |
+
f"- Released scored event rows: `{int(counts.get('scored_events', 0) or 0)}`",
|
| 126 |
+
f"- Released relationship rows: `{int(counts.get('graph_links', 0) or 0)}`",
|
| 127 |
+
f"- Public source artifacts in the audit index: `{int(counts.get('source_artifacts', 0) or 0)}`",
|
| 128 |
+
"",
|
| 129 |
+
"## What the app views mean",
|
| 130 |
+
"",
|
| 131 |
+
"- **Overview**: ranked sectors or funding recipients for one House member at a time.",
|
| 132 |
+
"- **Explain Link**: plain-English reasons and a coarse evidence window for one selected relationship.",
|
| 133 |
+
"- **Explore Graph**: optional visual map if you want to explore relationships spatially.",
|
| 134 |
+
"- **Search Events**: raw released event rows for deeper inspection.",
|
| 135 |
+
"- **Event Detail / Audit**: source URLs, SHA-backed artifacts, and consistency checks.",
|
| 136 |
+
"",
|
| 137 |
+
"## Important limits",
|
| 138 |
+
"",
|
| 139 |
+
f"- Relationship rows still marked needs review: `{int(label_counts.get('recipient_match_needs_review', 0) or 0)}`",
|
| 140 |
+
f"- True parse failures still present in the source slice: `{int(source_quality.get('parse_failure_count', 0) or 0)}`",
|
| 141 |
+
*[f"- {item}" for item in caveats[:4]],
|
| 142 |
+
]
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
def _data_used_markdown(manifest: Dict[str, Any]) -> str:
|
| 147 |
+
summary = manifest.get("methodology_summary") or {}
|
| 148 |
+
source_groups = _split_source_group_lines(summary.get("source_groups"))
|
| 149 |
+
return "\n".join(
|
| 150 |
+
[
|
| 151 |
+
"## What data is used here",
|
| 152 |
+
"",
|
| 153 |
+
"This release uses public records from these source groups:",
|
| 154 |
+
"",
|
| 155 |
+
*[f"- {item}" for item in source_groups],
|
| 156 |
+
"",
|
| 157 |
+
"## How those records show up in this release",
|
| 158 |
+
"",
|
| 159 |
+
"- `members.csv`: one row per House member in this slice.",
|
| 160 |
+
"- `scored_events.csv`: row-level overlaps or signals that survived into the public release.",
|
| 161 |
+
"- `graph_links.csv`: relationship-level rows aggregated from the event layer.",
|
| 162 |
+
"- `evidence_audit/*`: source URLs, SHA-256 values, and public-safe provenance rows for verification.",
|
| 163 |
+
"",
|
| 164 |
+
"Not every internal raw record is published here. The public package is a bounded, sanitized release layer.",
|
| 165 |
+
]
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
|
| 169 |
+
def _how_to_use_markdown() -> str:
|
| 170 |
+
return "\n".join(
|
| 171 |
+
[
|
| 172 |
+
"## How to read this",
|
| 173 |
+
"",
|
| 174 |
+
"1. Pick one House member.",
|
| 175 |
+
"2. Start in **Overview** and look at the top sectors or funding recipients.",
|
| 176 |
+
"3. Click a relationship in **Relationship to explain**.",
|
| 177 |
+
"4. Read the evidence breakdown and the coarse evidence window.",
|
| 178 |
+
"5. Use the source URLs and SHA-backed artifacts if you want to verify it yourself.",
|
| 179 |
+
"",
|
| 180 |
+
"The safest way to interpret this release is as a map of documented public-record relationships, not a verdict.",
|
| 181 |
+
]
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
def _plain_status_label(value: str) -> str:
|
| 186 |
normalized = str(value or "").strip()
|
| 187 |
mapping = {
|
|
|
|
| 874 |
|
| 875 |
def build_app(copy_path: str | Path):
|
| 876 |
data = load_release_data(copy_path)
|
| 877 |
+
manifest = data["manifest"]
|
| 878 |
events = data["events"]
|
| 879 |
nodes = data["graph_nodes"]
|
| 880 |
edges = data["graph_edges"]
|
|
|
|
| 907 |
|
| 908 |
with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
|
| 909 |
gr.Markdown(copy_payload.get("welcome_markdown", copy_payload.get("landing_markdown", "")))
|
| 910 |
+
with gr.Tab("What This Is"):
|
| 911 |
+
gr.Markdown(_about_release_markdown(manifest, data["recipient_link_quality"], data["source_quality"]))
|
| 912 |
+
gr.Markdown(_data_used_markdown(manifest))
|
| 913 |
+
gr.Markdown(_how_to_use_markdown())
|
| 914 |
with gr.Tab("Overview"):
|
| 915 |
gr.Markdown(
|
| 916 |
"### Start here\n\n"
|
|
|
|
| 1036 |
gr.Dataframe(value=data["artifact_index"].head(200), interactive=False)
|
| 1037 |
with gr.Tab("Methodology & Limits"):
|
| 1038 |
gr.Markdown(copy_payload.get("landing_markdown", ""))
|
| 1039 |
+
gr.Markdown(_data_used_markdown(manifest))
|
| 1040 |
gr.Markdown(copy_payload.get("downloads_markdown", ""))
|
| 1041 |
with gr.Tab("Downloads"):
|
| 1042 |
gr.Markdown(copy_payload.get("downloads_markdown", ""))
|