cjc0013 commited on
Commit
7705bfb
·
verified ·
1 Parent(s): 86f2170

Add plain-English what-this-is framing to Space

Browse files
Files changed (2) hide show
  1. public_copy.json +1 -1
  2. public_space_app.py +102 -0
public_copy.json CHANGED
@@ -4,7 +4,7 @@
4
  "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
5
  "dataset_repo_id": "cjc0013/cmp-data",
6
  "space_repo_id": "cjc0013/cmp",
7
- "welcome_markdown": "# Congress Public Records Slice\n\nStart with **Overview** for the clearest read.\n\n- Pick one House member first.\n- Use **Overview** to see the strongest sectors or funding recipients for that member.\n- Use **Explain Link** to see why one relationship appears in this released slice.\n- Use **Explore Graph** only if you want a secondary visual map.\n\nThis is an exploration tool, not an accusation tool.",
8
  "landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
9
  "downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
10
  "dataset_bundle_prefix": "dataset_bundle"
 
4
  "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
5
  "dataset_repo_id": "cjc0013/cmp-data",
6
  "space_repo_id": "cjc0013/cmp",
7
+ "welcome_markdown": "# Congress Public Records Slice\n\nStart with **What This Is**, then use **Overview**.\n\n- **What This Is** explains, in plain English, what this release contains and what it does not claim.\n- Pick one House member first.\n- Use **Overview** to see the strongest sectors or funding recipients for that member.\n- Use **Explain Link** to see why one relationship appears in this released slice.\n- Use **Explore Graph** only if you want a secondary visual map.\n\nThis is an exploration tool, not an accusation tool.",
8
  "landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
9
  "downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
10
  "dataset_bundle_prefix": "dataset_bundle"
public_space_app.py CHANGED
@@ -86,6 +86,102 @@ def _member_search_mask(frame: pd.DataFrame, query: str) -> pd.Series:
86
  return name_series.str.contains(query, case=False, na=False) | slug_series.str.contains(query, case=False, na=False)
87
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  def _plain_status_label(value: str) -> str:
90
  normalized = str(value or "").strip()
91
  mapping = {
@@ -778,6 +874,7 @@ def _event_detail(events: pd.DataFrame, provenance: pd.DataFrame, event_id: str)
778
 
779
  def build_app(copy_path: str | Path):
780
  data = load_release_data(copy_path)
 
781
  events = data["events"]
782
  nodes = data["graph_nodes"]
783
  edges = data["graph_edges"]
@@ -810,6 +907,10 @@ def build_app(copy_path: str | Path):
810
 
811
  with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
812
  gr.Markdown(copy_payload.get("welcome_markdown", copy_payload.get("landing_markdown", "")))
 
 
 
 
813
  with gr.Tab("Overview"):
814
  gr.Markdown(
815
  "### Start here\n\n"
@@ -935,6 +1036,7 @@ def build_app(copy_path: str | Path):
935
  gr.Dataframe(value=data["artifact_index"].head(200), interactive=False)
936
  with gr.Tab("Methodology & Limits"):
937
  gr.Markdown(copy_payload.get("landing_markdown", ""))
 
938
  gr.Markdown(copy_payload.get("downloads_markdown", ""))
939
  with gr.Tab("Downloads"):
940
  gr.Markdown(copy_payload.get("downloads_markdown", ""))
 
86
  return name_series.str.contains(query, case=False, na=False) | slug_series.str.contains(query, case=False, na=False)
87
 
88
 
89
+ def _split_source_group_lines(text: Any) -> list[str]:
90
+ lines = []
91
+ for raw_line in str(text or "").splitlines():
92
+ cleaned = raw_line.strip().lstrip("-").strip()
93
+ if cleaned:
94
+ lines.append(cleaned)
95
+ if "USAspending award pages used for some recipient matching" not in lines:
96
+ lines.append("USAspending award pages used for some recipient matching")
97
+ return lines
98
+
99
+
100
+ def _about_release_markdown(
101
+ manifest: Dict[str, Any],
102
+ recipient_link_quality: Dict[str, Any],
103
+ source_quality: Dict[str, Any],
104
+ ) -> str:
105
+ counts = manifest.get("counts") or {}
106
+ caveats = manifest.get("caveats") or []
107
+ label_counts = recipient_link_quality.get("label_counts") or {}
108
+ return "\n".join(
109
+ [
110
+ "## What this is",
111
+ "",
112
+ "This tool is a public-records explorer for one released slice of House data.",
113
+ "",
114
+ "It brings together records about House members, financial disclosures, legislative activity, lobbying visibility, and some community project funding recipient relationships.",
115
+ "",
116
+ "The goal is simple: help you inspect where public records overlap, then click through to the published source URLs and SHA-backed artifacts.",
117
+ "",
118
+ "It does **not** accuse anyone of a crime, corruption, or wrongdoing.",
119
+ "It does **not** prove intent or causality.",
120
+ "It does **not** claim this is the full universe of relevant data.",
121
+ "",
122
+ "## What is in this release",
123
+ "",
124
+ f"- House members in this slice: `{int(counts.get('members', 0) or 0)}`",
125
+ f"- Released scored event rows: `{int(counts.get('scored_events', 0) or 0)}`",
126
+ f"- Released relationship rows: `{int(counts.get('graph_links', 0) or 0)}`",
127
+ f"- Public source artifacts in the audit index: `{int(counts.get('source_artifacts', 0) or 0)}`",
128
+ "",
129
+ "## What the app views mean",
130
+ "",
131
+ "- **Overview**: ranked sectors or funding recipients for one House member at a time.",
132
+ "- **Explain Link**: plain-English reasons and a coarse evidence window for one selected relationship.",
133
+ "- **Explore Graph**: optional visual map if you want to explore relationships spatially.",
134
+ "- **Search Events**: raw released event rows for deeper inspection.",
135
+ "- **Event Detail / Audit**: source URLs, SHA-backed artifacts, and consistency checks.",
136
+ "",
137
+ "## Important limits",
138
+ "",
139
+ f"- Relationship rows still marked needs review: `{int(label_counts.get('recipient_match_needs_review', 0) or 0)}`",
140
+ f"- True parse failures still present in the source slice: `{int(source_quality.get('parse_failure_count', 0) or 0)}`",
141
+ *[f"- {item}" for item in caveats[:4]],
142
+ ]
143
+ )
144
+
145
+
146
+ def _data_used_markdown(manifest: Dict[str, Any]) -> str:
147
+ summary = manifest.get("methodology_summary") or {}
148
+ source_groups = _split_source_group_lines(summary.get("source_groups"))
149
+ return "\n".join(
150
+ [
151
+ "## What data is used here",
152
+ "",
153
+ "This release uses public records from these source groups:",
154
+ "",
155
+ *[f"- {item}" for item in source_groups],
156
+ "",
157
+ "## How those records show up in this release",
158
+ "",
159
+ "- `members.csv`: one row per House member in this slice.",
160
+ "- `scored_events.csv`: row-level overlaps or signals that survived into the public release.",
161
+ "- `graph_links.csv`: relationship-level rows aggregated from the event layer.",
162
+ "- `evidence_audit/*`: source URLs, SHA-256 values, and public-safe provenance rows for verification.",
163
+ "",
164
+ "Not every internal raw record is published here. The public package is a bounded, sanitized release layer.",
165
+ ]
166
+ )
167
+
168
+
169
+ def _how_to_use_markdown() -> str:
170
+ return "\n".join(
171
+ [
172
+ "## How to read this",
173
+ "",
174
+ "1. Pick one House member.",
175
+ "2. Start in **Overview** and look at the top sectors or funding recipients.",
176
+ "3. Click a relationship in **Relationship to explain**.",
177
+ "4. Read the evidence breakdown and the coarse evidence window.",
178
+ "5. Use the source URLs and SHA-backed artifacts if you want to verify it yourself.",
179
+ "",
180
+ "The safest way to interpret this release is as a map of documented public-record relationships, not a verdict.",
181
+ ]
182
+ )
183
+
184
+
185
  def _plain_status_label(value: str) -> str:
186
  normalized = str(value or "").strip()
187
  mapping = {
 
874
 
875
  def build_app(copy_path: str | Path):
876
  data = load_release_data(copy_path)
877
+ manifest = data["manifest"]
878
  events = data["events"]
879
  nodes = data["graph_nodes"]
880
  edges = data["graph_edges"]
 
907
 
908
  with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
909
  gr.Markdown(copy_payload.get("welcome_markdown", copy_payload.get("landing_markdown", "")))
910
+ with gr.Tab("What This Is"):
911
+ gr.Markdown(_about_release_markdown(manifest, data["recipient_link_quality"], data["source_quality"]))
912
+ gr.Markdown(_data_used_markdown(manifest))
913
+ gr.Markdown(_how_to_use_markdown())
914
  with gr.Tab("Overview"):
915
  gr.Markdown(
916
  "### Start here\n\n"
 
1036
  gr.Dataframe(value=data["artifact_index"].head(200), interactive=False)
1037
  with gr.Tab("Methodology & Limits"):
1038
  gr.Markdown(copy_payload.get("landing_markdown", ""))
1039
+ gr.Markdown(_data_used_markdown(manifest))
1040
  gr.Markdown(copy_payload.get("downloads_markdown", ""))
1041
  with gr.Tab("Downloads"):
1042
  gr.Markdown(copy_payload.get("downloads_markdown", ""))