cjc0013 commited on
Commit
9dbc3ce
·
verified ·
1 Parent(s): 5d1aa1e

Upload 5 files

Browse files
Files changed (5) hide show
  1. README.md +18 -14
  2. app.py +11 -0
  3. public_copy.json +10 -0
  4. public_space_app.py +466 -0
  5. requirements.txt +3 -0
README.md CHANGED
@@ -1,14 +1,18 @@
1
- ---
2
- title: Cmp
3
- emoji: 🏃
4
- colorFrom: indigo
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 6.12.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- short_description: cmp
12
- ---
13
-
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
+ # Congress Public Records Slice Space
2
+
3
+ Neutral Records explorer for a public-record slice of congressional money-and-power linkages.
4
+
5
+ ## Runtime Notes
6
+
7
+ - This Space reads the sanitized dataset bundle from the configured Hugging Face dataset repo.
8
+ - For local testing, set `local_dataset_root` in `public_copy.json` or regenerate the bundle with a local preview root.
9
+ - The Space is intentionally neutral and does not assign guilt, wrongdoing, intent, or causality.
10
+
11
+ ## Required Caveats
12
+
13
+ - This release is a slice of public-record data, not a complete accounting of all potentially relevant data.
14
+ - Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.
15
+ - This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.
16
+ - The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.
17
+ - Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.
18
+ - The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.
app.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from pathlib import Path
4
+
5
+ from public_space_app import build_app
6
+
7
+ APP_DIR = Path(__file__).resolve().parent
8
+ app = build_app(APP_DIR / "public_copy.json")
9
+
10
+ if __name__ == "__main__":
11
+ app.launch()
public_copy.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "public_version": "congress-public-records-slice-2026-04-v1",
3
+ "title": "Congress Public Records Slice",
4
+ "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
5
+ "dataset_repo_id": "cjc0013/cmp-data",
6
+ "space_repo_id": "cjc0013/cmp",
7
+ "landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
8
+ "downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
9
+ "dataset_bundle_prefix": "dataset_bundle"
10
+ }
public_space_app.py ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import html
4
+ import json
5
+ import os
6
+ import urllib.request
7
+ from pathlib import Path
8
+ from typing import Any, Dict, Tuple
9
+
10
+ import pandas as pd
11
+
12
+ try:
13
+ import gradio as gr
14
+ except ImportError as exc: # pragma: no cover - runtime dependency
15
+ raise RuntimeError("gradio is required to run this Space bundle") from exc
16
+
17
+ try:
18
+ from pyvis.network import Network
19
+ except ImportError as exc: # pragma: no cover - runtime dependency
20
+ raise RuntimeError("pyvis is required to run this Space bundle") from exc
21
+
22
+
23
+ def _read_json(source: str) -> Dict[str, Any]:
24
+ if source.startswith("http://") or source.startswith("https://"):
25
+ with urllib.request.urlopen(source) as response:
26
+ return json.loads(response.read().decode("utf-8"))
27
+ return json.loads(Path(source).read_text(encoding="utf-8"))
28
+
29
+
30
+ def _read_jsonl(source: str) -> pd.DataFrame:
31
+ if source.startswith("http://") or source.startswith("https://"):
32
+ with urllib.request.urlopen(source) as response:
33
+ lines = response.read().decode("utf-8").splitlines()
34
+ else:
35
+ lines = Path(source).read_text(encoding="utf-8").splitlines()
36
+ rows = [json.loads(line) for line in lines if line.strip()]
37
+ return pd.DataFrame(rows)
38
+
39
+
40
+ def _dataset_path(copy_payload: Dict[str, Any], relative_path: str) -> str:
41
+ local_root_value = os.environ.get("PUBLIC_RELEASE_LOCAL_ROOT", "").strip()
42
+ local_root = Path(local_root_value).resolve() if local_root_value else None
43
+ if local_root and (local_root / relative_path).exists():
44
+ return str(local_root / relative_path)
45
+ repo_id = str(copy_payload.get("dataset_repo_id") or "").strip()
46
+ if not repo_id:
47
+ raise FileNotFoundError(f"Dataset repo id is not configured for {relative_path}")
48
+ return f"https://huggingface.co/datasets/{repo_id}/resolve/main/{relative_path}"
49
+
50
+
51
+ def load_release_data(copy_path: str | Path) -> Dict[str, Any]:
52
+ copy_payload = json.loads(Path(copy_path).read_text(encoding="utf-8"))
53
+ bundle_root = copy_payload.get("dataset_bundle_prefix", "dataset_bundle")
54
+ def path_for(name: str) -> str:
55
+ return _dataset_path(copy_payload, f"{bundle_root}/{name}")
56
+ return {
57
+ "copy": copy_payload,
58
+ "manifest": _read_json(path_for("public_release_manifest.json")),
59
+ "members": pd.read_csv(path_for("members.csv")),
60
+ "events": pd.read_csv(path_for("scored_events.csv")),
61
+ "links": pd.read_csv(path_for("graph_links.csv")),
62
+ "recipient_link_quality": _read_json(path_for("recipient_link_quality_report.json")),
63
+ "source_quality": _read_json(path_for("source_quality_report.json")),
64
+ "provenance_coverage": _read_json(path_for("provenance_coverage_report.json")),
65
+ "graph_nodes": pd.read_csv(path_for("network_graph/nodes.csv")),
66
+ "graph_edges": pd.read_csv(path_for("network_graph/edges.csv")),
67
+ "graph_config": _read_json(path_for("network_graph/graph_config.json")),
68
+ "artifact_index": pd.read_csv(path_for("evidence_audit/source_artifact_index.csv")),
69
+ "event_audit": pd.read_csv(path_for("evidence_audit/scored_event_index.csv")),
70
+ "event_provenance": _read_jsonl(path_for("evidence_audit/scored_event_provenance.jsonl")),
71
+ "consistency": _read_json(path_for("evidence_audit/consistency_report.json")),
72
+ }
73
+
74
+
75
+ def _member_search_mask(frame: pd.DataFrame, query: str) -> pd.Series:
76
+ if not query.strip():
77
+ return pd.Series([True] * len(frame), index=frame.index)
78
+ name_series = frame.get("member_name", pd.Series("", index=frame.index)).fillna("")
79
+ slug_series = frame.get("member_slug", pd.Series("", index=frame.index)).fillna("")
80
+ return name_series.str.contains(query, case=False, na=False) | slug_series.str.contains(query, case=False, na=False)
81
+
82
+
83
+ def _plain_status_label(value: str) -> str:
84
+ normalized = str(value or "").strip()
85
+ mapping = {
86
+ "release_ok": "Stronger support",
87
+ "linked": "Stronger support",
88
+ "needs_review": "Needs review / caution",
89
+ "acceptable_with_label": "Usable with caveats",
90
+ "unresolved": "Unresolved",
91
+ "stronger": "Stronger support",
92
+ "all": "All shown relationships",
93
+ }
94
+ return mapping.get(normalized, normalized.replace("_", " ").title() or "Unknown")
95
+
96
+
97
+ def _plain_status_explainer(value: str) -> str:
98
+ normalized = str(value or "").strip()
99
+ mapping = {
100
+ "release_ok": "The released slice has clearer public support for this relationship.",
101
+ "linked": "The released slice has clearer public support for this relationship.",
102
+ "needs_review": "There is some support for this relationship, but it should be read with caution.",
103
+ "acceptable_with_label": "This relationship is usable in the release, but some caveats remain visible.",
104
+ "unresolved": "The released slice does not yet have enough public support to present this relationship as stronger.",
105
+ }
106
+ return mapping.get(normalized, "This relationship should be interpreted together with the attached evidence and caveats.")
107
+
108
+
109
+ def _plain_family_label(value: str) -> str:
110
+ normalized = str(value or "").strip()
111
+ return {
112
+ "recipient": "Funding recipients",
113
+ "sector": "Sectors",
114
+ "all": "All relationships",
115
+ }.get(normalized, normalized.replace("_", " ").title() or "Relationships")
116
+
117
+
118
+ def _plain_score_label(value: str) -> str:
119
+ normalized = str(value or "").strip()
120
+ return {
121
+ "strong_sector_overlap": "Stronger sector overlap",
122
+ "weak_sector_overlap": "Weaker sector overlap",
123
+ "all": "All score labels",
124
+ }.get(normalized, normalized.replace("_", " ").title() or "Score label")
125
+
126
+
127
+ def _trim_to_overview_members(edges: pd.DataFrame, max_members: int) -> pd.DataFrame:
128
+ if edges.empty or max_members <= 0:
129
+ return edges
130
+ member_totals = (
131
+ edges.groupby(["member_slug", "member_name"], dropna=False)
132
+ .agg(total_link_count=("link_count", "sum"), edge_count=("edge_id", "count"))
133
+ .reset_index()
134
+ .sort_values(["total_link_count", "edge_count", "member_name"], ascending=[False, False, True])
135
+ )
136
+ keep_slugs = {
137
+ str(value)
138
+ for value in member_totals["member_slug"].head(max_members).tolist()
139
+ if str(value).strip()
140
+ }
141
+ if not keep_slugs:
142
+ return edges
143
+ return edges[edges["member_slug"].isin(keep_slugs)]
144
+
145
+
146
+ def _graph_intro_markdown(config: Dict[str, Any]) -> str:
147
+ node_counts = config.get("node_counts") or {}
148
+ edge_counts = config.get("edge_counts") or {}
149
+ status_counts = config.get("relationship_status_counts") or {}
150
+ defaults = config.get("default_filters") or {}
151
+ example_members = [str(item) for item in (config.get("example_member_searches") or []) if str(item).strip()]
152
+ return "\n".join(
153
+ [
154
+ "### What you are looking at",
155
+ "",
156
+ "- Green dots are House members, rust dots are funding recipients, and gold dots are sectors.",
157
+ "- Thicker lines mean more supporting relationship rows in this released slice.",
158
+ f"- This graph opens in a simpler `{_plain_family_label(str(defaults.get('relationship_family', 'sector'))).lower()}` overview so the first screen is easier to read.",
159
+ f"- The default status filter is `{_plain_status_label(str(defaults.get('review_status', 'stronger'))).lower()}`.",
160
+ f"- Unresolved-only edges start hidden: `{str(bool(defaults.get('hide_unresolved_only', True))).lower()}`.",
161
+ *([f"- Example member searches: {', '.join(f'`{item}`' for item in example_members)}."] if example_members else []),
162
+ f"- Current graph inventory: `{int(node_counts.get('member', 0) or 0)}` members, `{int(node_counts.get('recipient', 0) or 0)}` recipients, `{int(node_counts.get('sector', 0) or 0)}` sectors.",
163
+ f"- Relationship counts: `{int(edge_counts.get('recipient', 0) or 0)}` recipient edges, `{int(edge_counts.get('sector', 0) or 0)}` sector edges.",
164
+ f"- Stronger-support relationships in this slice: `{int(status_counts.get('linked', 0) or 0) + int(status_counts.get('release_ok', 0) or 0)}`.",
165
+ f"- Needs-review relationships in this slice: `{int(status_counts.get('needs_review', 0) or 0)}`.",
166
+ f"- Unresolved relationships in this slice: `{int(status_counts.get('unresolved', 0) or 0)}`.",
167
+ ]
168
+ )
169
+
170
+
171
+ def _filter_events(events: pd.DataFrame, member_query: str, event_type: str, score_label: str, text_query: str) -> pd.DataFrame:
172
+ filtered = events.copy()
173
+ if member_query.strip():
174
+ filtered = filtered[_member_search_mask(filtered, member_query)]
175
+ if event_type != "all":
176
+ filtered = filtered[filtered["event_type"] == event_type]
177
+ if score_label != "all":
178
+ filtered = filtered[filtered["score_label"] == score_label]
179
+ if text_query.strip():
180
+ mask = filtered["issuer_raw"].fillna("").str.contains(text_query, case=False, na=False)
181
+ mask = mask | filtered["sector"].fillna("").str.contains(text_query, case=False, na=False)
182
+ filtered = filtered[mask]
183
+ return filtered
184
+
185
+
186
+ def _filter_graph(
187
+ edges: pd.DataFrame,
188
+ family: str,
189
+ member_query: str,
190
+ target_query: str,
191
+ score_label: str,
192
+ review_status: str,
193
+ hide_unresolved_only: bool,
194
+ max_edges: int,
195
+ overview_member_limit: int,
196
+ ) -> pd.DataFrame:
197
+ filtered = edges.copy()
198
+ if family != "all":
199
+ filtered = filtered[filtered["relationship_family"] == family]
200
+ if member_query.strip():
201
+ filtered = filtered[_member_search_mask(filtered, member_query)]
202
+ if target_query.strip():
203
+ filtered = filtered[filtered["target_label"].fillna("").str.contains(target_query, case=False, na=False)]
204
+ if score_label != "all":
205
+ filtered = filtered[filtered["score_labels"].fillna("").str.contains(score_label, case=False, na=False)]
206
+ if review_status == "stronger":
207
+ filtered = filtered[filtered["relationship_status"].isin(["linked", "release_ok"])]
208
+ elif review_status != "all":
209
+ filtered = filtered[filtered["relationship_status"] == review_status]
210
+ if hide_unresolved_only:
211
+ filtered = filtered[filtered["relationship_status"] != "unresolved"]
212
+ filtered = filtered.sort_values(["link_count", "strong_event_count", "linked_count"], ascending=[False, False, False])
213
+ if not member_query.strip() and not target_query.strip():
214
+ filtered = _trim_to_overview_members(filtered, int(overview_member_limit))
215
+ filtered = filtered.sort_values(["link_count", "strong_event_count", "linked_count"], ascending=[False, False, False])
216
+ return filtered.head(int(max_edges))
217
+
218
+
219
+ def _split_pipe_values(value: Any, *, limit: int | None = None) -> list[str]:
220
+ items = [item.strip() for item in str(value or "").split(" | ") if item and item.strip()]
221
+ if limit is not None:
222
+ return items[:limit]
223
+ return items
224
+
225
+
226
+ def _consistency_summary_markdown(consistency: Dict[str, Any]) -> str:
227
+ event_payload = consistency.get("event_provenance") or {}
228
+ claim_payload = consistency.get("claim_supporting_provenance") or {}
229
+ return "\n".join(
230
+ [
231
+ "### Audit Summary",
232
+ "",
233
+ f"- Event rows in the audit index: `{int(event_payload.get('event_count', 0) or 0)}`",
234
+ f"- Event rows with attached artifacts: `{int(event_payload.get('events_with_artifacts', 0) or 0)}`",
235
+ f"- Stored-versus-lookup provenance mismatches: `{int(event_payload.get('stored_lookup_mismatch_count', 0) or 0)}`",
236
+ f"- Claim-supporting rows in the audit index: `{int(claim_payload.get('row_count', 0) or 0)}`",
237
+ f"- Claim-supporting rows with attached artifacts: `{int(claim_payload.get('rows_with_artifacts', 0) or 0)}`",
238
+ "",
239
+ "Use the tables below to inspect the public source URLs and SHA-backed artifacts that support the released rows.",
240
+ ]
241
+ )
242
+
243
+
244
+ def _render_graph(nodes: pd.DataFrame, edges: pd.DataFrame) -> str:
245
+ if edges.empty:
246
+ return "<div style=\"padding: 1rem; border: 1px solid #d6d0c4; background: #fffdf8; color: #3a3a3a;\">No relationships match the current filters.</div>"
247
+ network = Network(height="720px", width="100%", bgcolor="#fbf7ee", font_color="#1f2b2d")
248
+ network.barnes_hut(gravity=-15000, central_gravity=0.15, spring_length=220, spring_strength=0.02)
249
+ network.set_options("""
250
+ var options = {
251
+ "interaction": {"hover": true, "tooltipDelay": 120, "navigationButtons": true, "keyboard": true},
252
+ "physics": {"stabilization": {"enabled": true, "iterations": 250}}
253
+ }
254
+ """)
255
+ color_map = {"member": "#1f5f5b", "recipient": "#a24e2c", "sector": "#c08d2e"}
256
+ edge_style_map = {
257
+ "linked": {"color": "#2f7d4a", "dashes": False},
258
+ "release_ok": {"color": "#2f7d4a", "dashes": False},
259
+ "needs_review": {"color": "#c67f00", "dashes": True},
260
+ "acceptable_with_label": {"color": "#b68b2a", "dashes": True},
261
+ "unresolved": {"color": "#9aa0a6", "dashes": True},
262
+ }
263
+ node_rows = nodes.set_index("node_id").to_dict("index")
264
+ for node_id in set(edges["source_node_id"]).union(set(edges["target_node_id"])):
265
+ node = node_rows.get(node_id)
266
+ if not node:
267
+ continue
268
+ node_type = str(node.get("node_type", ""))
269
+ title_lines = [f"<b>{html.escape(str(node.get('label', '')))}</b>"]
270
+ role_label = {
271
+ "member": "House member",
272
+ "recipient": "Funding recipient",
273
+ "sector": "Sector",
274
+ }.get(node_type, node_type.title())
275
+ title_lines.append(f"Role: {html.escape(role_label)}")
276
+ if node_type == "member":
277
+ party = str(node.get("party", "") or "").strip()
278
+ state = str(node.get("state", "") or "").strip()
279
+ if party or state:
280
+ title_lines.append(f"Party / State: {html.escape(' '.join(item for item in [party, state] if item))}")
281
+ title_lines.append(f"Released relationships in graph data: {int(node.get('connected_edge_count', 0) or 0)}")
282
+ network.add_node(
283
+ node_id,
284
+ label=str(node.get("label", "")),
285
+ title="<br>".join(title_lines),
286
+ color=color_map.get(str(node.get("node_type", "")), "#6e6e6e"),
287
+ shape="dot",
288
+ size=16 + min(int(node.get("connected_edge_count", 0) or 0), 20),
289
+ )
290
+ for row in edges.to_dict("records"):
291
+ status = str(row.get("relationship_status", "") or "")
292
+ source_urls = [item for item in str(row.get("source_urls", "") or "").split(" | ") if item]
293
+ source_preview = "<br>".join(html.escape(item) for item in source_urls[:3]) or "No public URLs attached in this edge summary."
294
+ family_label = _plain_family_label(str(row.get("relationship_family", "")))
295
+ score_labels = [
296
+ _plain_score_label(part.split(":", 1)[0])
297
+ for part in str(row.get("score_labels", "") or "").split(" | ")
298
+ if ":" in part
299
+ ]
300
+ title_lines = [
301
+ f"<b>{html.escape(str(row.get('member_name', '') or row.get('member_slug', '')))} -> {html.escape(str(row.get('target_label', '')))}</b>",
302
+ f"Relationship type: {html.escape(family_label)}",
303
+ f"Presentation tier: {html.escape(_plain_status_label(status))}",
304
+ html.escape(_plain_status_explainer(status)),
305
+ f"Supporting relationship rows in this slice: {int(row.get('link_count', 0) or 0)}",
306
+ ]
307
+ if str(row.get("relationship_family", "") or "").strip() == "recipient":
308
+ title_lines.append(f"Stronger-support rows: {int(row.get('linked_count', 0) or 0)}")
309
+ title_lines.append(f"Needs-review rows: {int(row.get('review_count', 0) or 0)}")
310
+ else:
311
+ title_lines.append(f"Stronger sector-overlap events: {int(row.get('strong_event_count', 0) or 0)}")
312
+ title_lines.append(f"Weaker sector-overlap events: {int(row.get('weak_event_count', 0) or 0)}")
313
+ if score_labels:
314
+ title_lines.append(f"Score labels: {html.escape(', '.join(score_labels[:4]))}")
315
+ unresolved_count = int(row.get("unresolved_source_ref_count", 0) or 0)
316
+ if unresolved_count:
317
+ title_lines.append(f"Unresolved source references still counted: {unresolved_count}")
318
+ if source_urls:
319
+ title_lines.append("Example source URLs:")
320
+ title_lines.append(source_preview)
321
+ edge_style = edge_style_map.get(status, {"color": "#7b7b7b", "dashes": False})
322
+ network.add_edge(
323
+ str(row.get("source_node_id", "")),
324
+ str(row.get("target_node_id", "")),
325
+ value=max(int(row.get("link_count", 1) or 1), 1),
326
+ width=1 + min(int(row.get("link_count", 1) or 1), 8),
327
+ title="<br>".join(title_lines),
328
+ color=edge_style["color"],
329
+ dashes=edge_style["dashes"],
330
+ )
331
+ return network.generate_html(notebook=False)
332
+
333
+
334
+ def _event_detail(events: pd.DataFrame, provenance: pd.DataFrame, event_id: str) -> Tuple[str, pd.DataFrame]:
335
+ if not event_id or event_id not in set(events["event_id"]):
336
+ return "Select an event id to inspect source URLs and SHA-backed artifacts.", pd.DataFrame()
337
+ event_row = events[events["event_id"] == event_id].head(1).to_dict("records")[0]
338
+ prov_rows = provenance[provenance["row_key"] == event_id]
339
+ member_name = str(event_row.get("member_name") or event_row.get("member_slug") or "Unknown member")
340
+ event_type = str(event_row.get("event_type") or "").replace("_", " ").strip() or "unspecified event"
341
+ score_label = _plain_score_label(str(event_row.get("score_label") or ""))
342
+ issuer_raw = str(event_row.get("issuer_raw") or "").strip()
343
+ sector = str(event_row.get("sector") or "").strip()
344
+ reason_codes = _split_pipe_values(event_row.get("reason_codes", ""))
345
+ missing_to_strengthen = _split_pipe_values(event_row.get("missing_to_strengthen", ""))
346
+ source_urls = _split_pipe_values(event_row.get("source_urls", ""), limit=5)
347
+ sha_values = _split_pipe_values(event_row.get("sha256_values", ""), limit=5)
348
+ lines = [
349
+ f"### {member_name}",
350
+ "",
351
+ "This panel summarizes one released event row from the public slice.",
352
+ "",
353
+ f"- Event id: `{event_id}`",
354
+ f"- Event type: `{event_type}`",
355
+ ]
356
+ if score_label:
357
+ lines.append(f"- Score label: `{score_label}`")
358
+ confidence_bucket = str(event_row.get("confidence_bucket") or "").strip()
359
+ if confidence_bucket:
360
+ lines.append(f"- Confidence bucket: `{confidence_bucket}`")
361
+ if issuer_raw:
362
+ lines.append(f"- Issuer or subject: `{issuer_raw}`")
363
+ if sector:
364
+ lines.append(f"- Sector: `{sector}`")
365
+ lines.extend(
366
+ [
367
+ f"- Attached source URLs in this row: `{int(event_row.get('source_ref_count', 0) or 0)}`",
368
+ f"- SHA-backed artifacts attached: `{int(event_row.get('sha_backed_source_artifact_count', 0) or 0)}`",
369
+ f"- Unresolved source references still counted: `{int(event_row.get('unresolved_source_ref_count', 0) or 0)}`",
370
+ f"- Matching provenance rows shown below: `{len(prov_rows)}`",
371
+ ]
372
+ )
373
+ if reason_codes:
374
+ lines.extend(["", "#### Why this row appears", ""])
375
+ lines.extend(f"- `{item}`" for item in reason_codes[:8])
376
+ if missing_to_strengthen:
377
+ lines.extend(["", "#### What would strengthen it", ""])
378
+ lines.extend(f"- `{item}`" for item in missing_to_strengthen[:8])
379
+ if source_urls:
380
+ lines.extend(["", "#### Example source URLs", ""])
381
+ lines.extend(f"- {item}" for item in source_urls)
382
+ if sha_values:
383
+ lines.extend(["", "#### Example SHA-256 values", ""])
384
+ lines.extend(f"- `{item}`" for item in sha_values)
385
+ return "\n".join(lines), prov_rows
386
+
387
+
388
+ def build_app(copy_path: str | Path):
389
+ data = load_release_data(copy_path)
390
+ events = data["events"]
391
+ links = data["links"]
392
+ nodes = data["graph_nodes"]
393
+ edges = data["graph_edges"]
394
+ provenance = data["event_provenance"]
395
+ copy_payload = data["copy"]
396
+
397
+ event_type_choices = ["all"] + sorted(value for value in events["event_type"].dropna().unique().tolist())
398
+ score_label_choices = ["all"] + sorted(value for value in events["score_label"].dropna().unique().tolist())
399
+ graph_score_choices = [("All score labels", "all")] + [
400
+ (_plain_score_label(value), value)
401
+ for value in sorted(value for value in data["graph_config"].get("available_score_labels") or [])
402
+ ]
403
+ graph_status_choices = [
404
+ ("All shown relationships", "all"),
405
+ ("Stronger support", "stronger"),
406
+ ("Needs review / caution", "needs_review"),
407
+ ("Usable with caveats", "acceptable_with_label"),
408
+ ("Unresolved", "unresolved"),
409
+ ]
410
+ graph_family_choices = [
411
+ ("Sectors", "sector"),
412
+ ("Funding recipients", "recipient"),
413
+ ("All relationships", "all"),
414
+ ]
415
+ event_id_choices = sorted(events["event_id"].dropna().unique().tolist())
416
+ graph_defaults = data["graph_config"].get("default_filters") or {}
417
+ overview_member_limit = int(graph_defaults.get("overview_member_limit", 8))
418
+
419
+ with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
420
+ gr.Markdown(copy_payload.get("landing_markdown", ""))
421
+ with gr.Tab("Explore"):
422
+ with gr.Row():
423
+ member_query = gr.Textbox(label="Member name or slug")
424
+ event_type = gr.Dropdown(label="Event type", choices=event_type_choices, value="all")
425
+ score_label = gr.Dropdown(label="Score label", choices=score_label_choices, value="all")
426
+ text_query = gr.Textbox(label="Issuer or sector search")
427
+ explore_df = gr.Dataframe(value=events.head(100), interactive=False)
428
+ def _update_events(member_query: str, event_type: str, score_label: str, text_query: str):
429
+ return _filter_events(events, member_query, event_type, score_label, text_query)
430
+ for control in (member_query, event_type, score_label, text_query):
431
+ control.change(_update_events, [member_query, event_type, score_label, text_query], explore_df)
432
+ with gr.Tab("Network Graph"):
433
+ gr.Markdown(_graph_intro_markdown(data["graph_config"]))
434
+ with gr.Row():
435
+ family = gr.Dropdown(label="Relationship view", choices=graph_family_choices, value=str(graph_defaults.get("relationship_family", "sector")))
436
+ member_graph_query = gr.Textbox(label="Member name or slug")
437
+ target_query = gr.Textbox(label="Recipient or sector search")
438
+ graph_score = gr.Dropdown(label="Score label", choices=graph_score_choices, value="all")
439
+ review_status = gr.Dropdown(label="Relationship strength", choices=graph_status_choices, value=str(graph_defaults.get("review_status", "stronger")))
440
+ with gr.Row():
441
+ hide_unresolved_only = gr.Checkbox(label="Hide unresolved relationships", value=bool(graph_defaults.get("hide_unresolved_only", True)))
442
+ max_edges = gr.Slider(label="Max visible relationships", minimum=25, maximum=300, step=25, value=int(graph_defaults.get("max_edges", 60)))
443
+ graph_html = gr.HTML()
444
+ graph_df = gr.Dataframe(interactive=False)
445
+ def _update_graph(family: str, member_graph_query: str, target_query: str, graph_score: str, review_status: str, hide_unresolved_only: bool, max_edges: int):
446
+ filtered_edges = _filter_graph(edges, family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges, overview_member_limit)
447
+ filtered_nodes = nodes[nodes["node_id"].isin(set(filtered_edges["source_node_id"]).union(set(filtered_edges["target_node_id"])))]
448
+ return _render_graph(filtered_nodes, filtered_edges), filtered_edges
449
+ for control in (family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges):
450
+ control.change(_update_graph, [family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges], [graph_html, graph_df])
451
+ app.load(_update_graph, [family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges], [graph_html, graph_df])
452
+ with gr.Tab("Event Detail"):
453
+ event_id = gr.Dropdown(label="Event id", choices=event_id_choices, value=event_id_choices[0] if event_id_choices else None)
454
+ event_detail_md = gr.Markdown()
455
+ event_detail_df = gr.Dataframe(interactive=False)
456
+ event_id.change(_event_detail, [gr.State(events), gr.State(provenance), event_id], [event_detail_md, event_detail_df])
457
+ app.load(_event_detail, [gr.State(events), gr.State(provenance), event_id], [event_detail_md, event_detail_df])
458
+ with gr.Tab("Audit"):
459
+ gr.Markdown(_consistency_summary_markdown(data["consistency"]))
460
+ gr.Dataframe(value=data["artifact_index"].head(200), interactive=False)
461
+ with gr.Tab("Methodology & Limits"):
462
+ gr.Markdown(copy_payload.get("landing_markdown", ""))
463
+ gr.Markdown(copy_payload.get("downloads_markdown", ""))
464
+ with gr.Tab("Downloads"):
465
+ gr.Markdown(copy_payload.get("downloads_markdown", ""))
466
+ return app
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio>=4.44.0
2
+ pandas>=2.2.0
3
+ pyvis>=0.3.2