Spaces:

cjc0013
/

cmp

Running

App Files Files Community

cjc0013 commited on Apr 19

Commit

7705bfb

verified ·

1 Parent(s): 86f2170

Add plain-English what-this-is framing to Space

Browse files

Files changed (2) hide show

public_copy.json +1 -1
public_space_app.py +102 -0

public_copy.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
   "dataset_repo_id": "cjc0013/cmp-data",
   "space_repo_id": "cjc0013/cmp",
-  "welcome_markdown": "# Congress Public Records Slice\n\nStart with **Overview** for the clearest read.\n\n- Pick one House member first.\n- Use **Overview** to see the strongest sectors or funding recipients for that member.\n- Use **Explain Link** to see why one relationship appears in this released slice.\n- Use **Explore Graph** only if you want a secondary visual map.\n\nThis is an exploration tool, not an accusation tool.",
   "landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
   "downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
   "dataset_bundle_prefix": "dataset_bundle"

   "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
   "dataset_repo_id": "cjc0013/cmp-data",
   "space_repo_id": "cjc0013/cmp",
+  "welcome_markdown": "# Congress Public Records Slice\n\nStart with **What This Is**, then use **Overview**.\n\n- **What This Is** explains, in plain English, what this release contains and what it does not claim.\n- Pick one House member first.\n- Use **Overview** to see the strongest sectors or funding recipients for that member.\n- Use **Explain Link** to see why one relationship appears in this released slice.\n- Use **Explore Graph** only if you want a secondary visual map.\n\nThis is an exploration tool, not an accusation tool.",
   "landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
   "downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
   "dataset_bundle_prefix": "dataset_bundle"

public_space_app.py CHANGED Viewed

@@ -86,6 +86,102 @@ def _member_search_mask(frame: pd.DataFrame, query: str) -> pd.Series:
     return name_series.str.contains(query, case=False, na=False) | slug_series.str.contains(query, case=False, na=False)
 def _plain_status_label(value: str) -> str:
     normalized = str(value or "").strip()
     mapping = {
@@ -778,6 +874,7 @@ def _event_detail(events: pd.DataFrame, provenance: pd.DataFrame, event_id: str)
 def build_app(copy_path: str | Path):
     data = load_release_data(copy_path)
     events = data["events"]
     nodes = data["graph_nodes"]
     edges = data["graph_edges"]
@@ -810,6 +907,10 @@ def build_app(copy_path: str | Path):
     with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
         gr.Markdown(copy_payload.get("welcome_markdown", copy_payload.get("landing_markdown", "")))
         with gr.Tab("Overview"):
             gr.Markdown(
                 "### Start here\n\n"
@@ -935,6 +1036,7 @@ def build_app(copy_path: str | Path):
             gr.Dataframe(value=data["artifact_index"].head(200), interactive=False)
         with gr.Tab("Methodology & Limits"):
             gr.Markdown(copy_payload.get("landing_markdown", ""))
             gr.Markdown(copy_payload.get("downloads_markdown", ""))
         with gr.Tab("Downloads"):
             gr.Markdown(copy_payload.get("downloads_markdown", ""))

     return name_series.str.contains(query, case=False, na=False) | slug_series.str.contains(query, case=False, na=False)
+def _split_source_group_lines(text: Any) -> list[str]:
+    lines = []
+    for raw_line in str(text or "").splitlines():
+        cleaned = raw_line.strip().lstrip("-").strip()
+        if cleaned:
+            lines.append(cleaned)
+    if "USAspending award pages used for some recipient matching" not in lines:
+        lines.append("USAspending award pages used for some recipient matching")
+    return lines
+def _about_release_markdown(
+    manifest: Dict[str, Any],
+    recipient_link_quality: Dict[str, Any],
+    source_quality: Dict[str, Any],
+) -> str:
+    counts = manifest.get("counts") or {}
+    caveats = manifest.get("caveats") or []
+    label_counts = recipient_link_quality.get("label_counts") or {}
+    return "\n".join(
+        [
+            "## What this is",
+            "",
+            "This tool is a public-records explorer for one released slice of House data.",
+            "",
+            "It brings together records about House members, financial disclosures, legislative activity, lobbying visibility, and some community project funding recipient relationships.",
+            "",
+            "The goal is simple: help you inspect where public records overlap, then click through to the published source URLs and SHA-backed artifacts.",
+            "",
+            "It does **not** accuse anyone of a crime, corruption, or wrongdoing.",
+            "It does **not** prove intent or causality.",
+            "It does **not** claim this is the full universe of relevant data.",
+            "",
+            "## What is in this release",
+            "",
+            f"- House members in this slice: `{int(counts.get('members', 0) or 0)}`",
+            f"- Released scored event rows: `{int(counts.get('scored_events', 0) or 0)}`",
+            f"- Released relationship rows: `{int(counts.get('graph_links', 0) or 0)}`",
+            f"- Public source artifacts in the audit index: `{int(counts.get('source_artifacts', 0) or 0)}`",
+            "",
+            "## What the app views mean",
+            "",
+            "- **Overview**: ranked sectors or funding recipients for one House member at a time.",
+            "- **Explain Link**: plain-English reasons and a coarse evidence window for one selected relationship.",
+            "- **Explore Graph**: optional visual map if you want to explore relationships spatially.",
+            "- **Search Events**: raw released event rows for deeper inspection.",
+            "- **Event Detail / Audit**: source URLs, SHA-backed artifacts, and consistency checks.",
+            "",
+            "## Important limits",
+            "",
+            f"- Relationship rows still marked needs review: `{int(label_counts.get('recipient_match_needs_review', 0) or 0)}`",
+            f"- True parse failures still present in the source slice: `{int(source_quality.get('parse_failure_count', 0) or 0)}`",
+            *[f"- {item}" for item in caveats[:4]],
+        ]
+    )
+def _data_used_markdown(manifest: Dict[str, Any]) -> str:
+    summary = manifest.get("methodology_summary") or {}
+    source_groups = _split_source_group_lines(summary.get("source_groups"))
+    return "\n".join(
+        [
+            "## What data is used here",
+            "",
+            "This release uses public records from these source groups:",
+            "",
+            *[f"- {item}" for item in source_groups],
+            "",
+            "## How those records show up in this release",
+            "",
+            "- `members.csv`: one row per House member in this slice.",
+            "- `scored_events.csv`: row-level overlaps or signals that survived into the public release.",
+            "- `graph_links.csv`: relationship-level rows aggregated from the event layer.",
+            "- `evidence_audit/*`: source URLs, SHA-256 values, and public-safe provenance rows for verification.",
+            "",
+            "Not every internal raw record is published here. The public package is a bounded, sanitized release layer.",
+        ]
+    )
+def _how_to_use_markdown() -> str:
+    return "\n".join(
+        [
+            "## How to read this",
+            "",
+            "1. Pick one House member.",
+            "2. Start in **Overview** and look at the top sectors or funding recipients.",
+            "3. Click a relationship in **Relationship to explain**.",
+            "4. Read the evidence breakdown and the coarse evidence window.",
+            "5. Use the source URLs and SHA-backed artifacts if you want to verify it yourself.",
+            "",
+            "The safest way to interpret this release is as a map of documented public-record relationships, not a verdict.",
+        ]
+    )
 def _plain_status_label(value: str) -> str:
     normalized = str(value or "").strip()
     mapping = {
 def build_app(copy_path: str | Path):
     data = load_release_data(copy_path)
+    manifest = data["manifest"]
     events = data["events"]
     nodes = data["graph_nodes"]
     edges = data["graph_edges"]
     with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
         gr.Markdown(copy_payload.get("welcome_markdown", copy_payload.get("landing_markdown", "")))
+        with gr.Tab("What This Is"):
+            gr.Markdown(_about_release_markdown(manifest, data["recipient_link_quality"], data["source_quality"]))
+            gr.Markdown(_data_used_markdown(manifest))
+            gr.Markdown(_how_to_use_markdown())
         with gr.Tab("Overview"):
             gr.Markdown(
                 "### Start here\n\n"
             gr.Dataframe(value=data["artifact_index"].head(200), interactive=False)
         with gr.Tab("Methodology & Limits"):
             gr.Markdown(copy_payload.get("landing_markdown", ""))
+            gr.Markdown(_data_used_markdown(manifest))
             gr.Markdown(copy_payload.get("downloads_markdown", ""))
         with gr.Tab("Downloads"):
             gr.Markdown(copy_payload.get("downloads_markdown", ""))