cjc0013 commited on
Commit
fbba473
·
verified ·
1 Parent(s): 9f022b5

Add reporter handoff CSV alongside deterministic raw export

Browse files
Files changed (1) hide show
  1. public_space_app.py +80 -14
public_space_app.py CHANGED
@@ -821,9 +821,12 @@ def _graph_view_summary_markdown(
821
 
822
  def _plain_reason_code(value: str) -> str:
823
  normalized = str(value or "").strip()
 
824
  mapping = {
825
  "recipient_exact_match": "Exact recipient match",
826
  "issuer_match": "Issuer or company match",
 
 
827
  "legislative_relevance_match": "Legislative topic match",
828
  "major_vote_overlap": "Vote activity overlaps the same topic window",
829
  "lobbying_issue_overlap": "Lobbying activity overlaps the same topic window",
@@ -832,7 +835,7 @@ def _plain_reason_code(value: str) -> str:
832
  "lobbying_density_support": "Many related lobbying filings in the same area",
833
  "insufficient_official_support": "Not enough official support for a stronger label",
834
  }
835
- return mapping.get(normalized, normalized.replace("_", " ").title() or "Signal")
836
 
837
 
838
  def _plain_strengthener(value: str) -> str:
@@ -1339,15 +1342,63 @@ def _relationship_export_rows(edges: pd.DataFrame, relationship_id: str, ranking
1339
  return export_rows
1340
 
1341
 
1342
- def _write_relationship_export_bundle(edges: pd.DataFrame, relationship_id: str, ranking_mode: str) -> tuple[str, str | None, str | None]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1343
  export_rows = _relationship_export_rows(edges, relationship_id, ranking_mode)
1344
  if not export_rows:
1345
- return "Pick one relationship to generate exportable evidence files.", None, None
1346
  relationship_id_value = str(export_rows[0]["relationship_id"] or relationship_id)
1347
  export_dir = Path(tempfile.gettempdir()) / "cmp_space_exports"
1348
  export_dir.mkdir(parents=True, exist_ok=True)
1349
  stem = _safe_export_stem(f"{relationship_id_value}-{ranking_mode}")
1350
- csv_path = export_dir / f"{stem}.csv"
 
1351
  pdf_path = export_dir / f"{stem}.pdf"
1352
 
1353
  fieldnames = [
@@ -1370,6 +1421,14 @@ def _write_relationship_export_bundle(edges: pd.DataFrame, relationship_id: str,
1370
  for export_row in export_rows:
1371
  writer.writerow({name: export_row.get(name, "") for name in fieldnames})
1372
 
 
 
 
 
 
 
 
 
1373
  title = f"{export_rows[0]['member_name']} -> {export_rows[0]['target_label']}"
1374
  pdf = canvas.Canvas(str(pdf_path), pagesize=LETTER, invariant=1)
1375
  width, height = LETTER
@@ -1404,9 +1463,9 @@ def _write_relationship_export_bundle(edges: pd.DataFrame, relationship_id: str,
1404
  pdf.save()
1405
  note = (
1406
  f"Prepared deterministic export files for `{relationship_id_value}`. "
1407
- "The CSV keeps one row per exported evidence item, and the PDF mirrors the same content in a fixed order."
1408
  )
1409
- return note, str(csv_path), str(pdf_path)
1410
 
1411
 
1412
  def _timeline_window_from_url(url: str) -> tuple[int, str, str]:
@@ -1828,7 +1887,9 @@ def build_app(copy_path: str | Path):
1828
  options = _relationship_options(ranked)
1829
  valid_ids = {value for _, value in options}
1830
  selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
1831
- export_note, export_csv, export_pdf = _write_relationship_export_bundle(filtered_edges, selected or "", ranking_mode)
 
 
1832
  return (
1833
  _overview_summary_markdown(
1834
  ranked,
@@ -1851,6 +1912,7 @@ def build_app(copy_path: str | Path):
1851
  _relationship_timeline_html(filtered_edges, selected or ""),
1852
  export_note,
1853
  export_csv,
 
1854
  export_pdf,
1855
  )
1856
 
@@ -1863,12 +1925,15 @@ def build_app(copy_path: str | Path):
1863
  relationship_id: str,
1864
  ):
1865
  filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
1866
- export_note, export_csv, export_pdf = _write_relationship_export_bundle(filtered_edges, relationship_id, ranking_mode)
 
 
1867
  return (
1868
  _relationship_detail_markdown(filtered_edges, relationship_id, ranking_mode),
1869
  _relationship_timeline_html(filtered_edges, relationship_id),
1870
  export_note,
1871
  export_csv,
 
1872
  export_pdf,
1873
  )
1874
 
@@ -2007,29 +2072,30 @@ def build_app(copy_path: str | Path):
2007
  overview_timeline_html = gr.HTML()
2008
  export_note_md = gr.Markdown()
2009
  with gr.Row():
2010
- export_csv_file = gr.File(label="Evidence breakdown CSV", interactive=False)
 
2011
  export_pdf_file = gr.File(label="Evidence breakdown PDF", interactive=False)
2012
 
2013
  search_button.click(
2014
  _update_overview,
2015
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2016
- [overview_summary_md, overview_cards, relationship_choice, overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_pdf_file],
2017
  )
2018
  overview_member.submit(
2019
  _update_overview,
2020
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2021
- [overview_summary_md, overview_cards, relationship_choice, overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_pdf_file],
2022
  )
2023
  for control in (overview_family, overview_ranking_mode, overview_only_strong, overview_top_n):
2024
  control.change(
2025
  _update_overview,
2026
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2027
- [overview_summary_md, overview_cards, relationship_choice, overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_pdf_file],
2028
  )
2029
  relationship_choice.change(
2030
  _update_overview_detail,
2031
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2032
- [overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_pdf_file],
2033
  )
2034
 
2035
  with gr.Accordion("Explore the network map (optional)", open=False):
@@ -2127,7 +2193,7 @@ def build_app(copy_path: str | Path):
2127
  app.load(
2128
  _update_overview,
2129
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2130
- [overview_summary_md, overview_cards, relationship_choice, overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_pdf_file],
2131
  )
2132
  app.load(
2133
  _update_graph,
 
821
 
822
  def _plain_reason_code(value: str) -> str:
823
  normalized = str(value or "").strip()
824
+ normalized_key = normalized.lower().replace(" ", "_")
825
  mapping = {
826
  "recipient_exact_match": "Exact recipient match",
827
  "issuer_match": "Issuer or company match",
828
+ "bill_sponsor_overlap": "Bill sponsorship overlaps the same topic window",
829
+ "committee_jurisdiction_match": "Committee jurisdiction overlaps the same topic area",
830
  "legislative_relevance_match": "Legislative topic match",
831
  "major_vote_overlap": "Vote activity overlaps the same topic window",
832
  "lobbying_issue_overlap": "Lobbying activity overlaps the same topic window",
 
835
  "lobbying_density_support": "Many related lobbying filings in the same area",
836
  "insufficient_official_support": "Not enough official support for a stronger label",
837
  }
838
+ return mapping.get(normalized_key, normalized.replace("_", " ").title() or "Signal")
839
 
840
 
841
  def _plain_strengthener(value: str) -> str:
 
1342
  return export_rows
1343
 
1344
 
1345
+ def _relationship_handoff_rows(edges: pd.DataFrame, relationship_id: str, ranking_mode: str) -> list[dict[str, Any]]:
1346
+ row = _select_edge_row(edges, relationship_id)
1347
+ if not row:
1348
+ return []
1349
+ family = str(row.get("relationship_family", "") or "")
1350
+ raw_score = _relationship_score(row)
1351
+ relative_score = _relative_relationship_score(row, _member_activity_baselines(edges))
1352
+ display_score = relative_score if str(ranking_mode or "raw").strip().lower() == "relative" else raw_score
1353
+ handoff_rows: list[dict[str, Any]] = []
1354
+
1355
+ def add(section: str, label: str, explanation: str, source_url: str = "") -> None:
1356
+ handoff_rows.append(
1357
+ {
1358
+ "section": section,
1359
+ "label": label,
1360
+ "explanation": explanation,
1361
+ "source_url": source_url,
1362
+ }
1363
+ )
1364
+
1365
+ add("Summary", "Member", str(row.get("member_name") or row.get("member_slug") or ""))
1366
+ add("Summary", "Target", str(row.get("target_label") or ""))
1367
+ add("Summary", "Relationship view", _plain_family_label(family))
1368
+ add("Summary", "Strength label", _plain_status_label(str(row.get("relationship_status", "") or "")))
1369
+ add("Summary", "Displayed score", str(display_score))
1370
+ add("Summary", "Raw score", str(raw_score))
1371
+ add("Summary", "Relative-to-baseline score (experimental)", str(relative_score))
1372
+ add("Summary", "Supporting relationship rows", str(int(row.get("link_count", 0) or 0)))
1373
+ add("Summary", "Stronger-support rows", str(_stronger_support_count(row)))
1374
+ add("Summary", "Caution / weaker rows", str(int(row.get("review_count", 0) or 0) if family == "recipient" else int(row.get("weak_event_count", 0) or 0)))
1375
+ add("Summary", "Integrity-checked source records attached", str(int(row.get("sha_backed_source_artifact_count", 0) or 0)))
1376
+ add("Summary", "Unresolved source refs still counted", str(int(row.get("unresolved_source_ref_count", 0) or 0)))
1377
+ add("Summary", "Evidence window", _window_overlap_text(row))
1378
+
1379
+ for chip in sorted(_edge_evidence_chips(row)):
1380
+ add("Evidence signals", chip.title(), _evidence_chip_help(chip))
1381
+ for reason in sorted(_plain_reason_code(item) for item in _split_pipe_values(row.get("reason_codes", ""), limit=8)):
1382
+ add("Why this link appears", reason, reason)
1383
+ for item in sorted(_plain_strengthener(value) for value in _split_pipe_values(row.get("missing_to_strengthen", ""), limit=12)):
1384
+ add("What would strengthen it", "Needs stronger support", item)
1385
+ for url in sorted(_split_pipe_values(row.get("source_urls", ""), limit=20)):
1386
+ add("Published source URLs", urlparse(url).netloc or "Published source URL", "Open this published record directly.", url)
1387
+ return handoff_rows
1388
+
1389
+
1390
+ def _write_relationship_export_bundle(
1391
+ edges: pd.DataFrame, relationship_id: str, ranking_mode: str
1392
+ ) -> tuple[str, str | None, str | None, str | None]:
1393
  export_rows = _relationship_export_rows(edges, relationship_id, ranking_mode)
1394
  if not export_rows:
1395
+ return "Pick one relationship to generate exportable evidence files.", None, None, None
1396
  relationship_id_value = str(export_rows[0]["relationship_id"] or relationship_id)
1397
  export_dir = Path(tempfile.gettempdir()) / "cmp_space_exports"
1398
  export_dir.mkdir(parents=True, exist_ok=True)
1399
  stem = _safe_export_stem(f"{relationship_id_value}-{ranking_mode}")
1400
+ csv_path = export_dir / f"{stem}-raw.csv"
1401
+ handoff_csv_path = export_dir / f"{stem}-handoff.csv"
1402
  pdf_path = export_dir / f"{stem}.pdf"
1403
 
1404
  fieldnames = [
 
1421
  for export_row in export_rows:
1422
  writer.writerow({name: export_row.get(name, "") for name in fieldnames})
1423
 
1424
+ handoff_rows = _relationship_handoff_rows(edges, relationship_id, ranking_mode)
1425
+ handoff_fieldnames = ["section", "label", "explanation", "source_url"]
1426
+ with handoff_csv_path.open("w", encoding="utf-8", newline="") as handle:
1427
+ writer = csv.DictWriter(handle, fieldnames=handoff_fieldnames)
1428
+ writer.writeheader()
1429
+ for export_row in handoff_rows:
1430
+ writer.writerow({name: export_row.get(name, "") for name in handoff_fieldnames})
1431
+
1432
  title = f"{export_rows[0]['member_name']} -> {export_rows[0]['target_label']}"
1433
  pdf = canvas.Canvas(str(pdf_path), pagesize=LETTER, invariant=1)
1434
  width, height = LETTER
 
1463
  pdf.save()
1464
  note = (
1465
  f"Prepared deterministic export files for `{relationship_id_value}`. "
1466
+ "Use the raw CSV for machine-style row exports, the handoff CSV for a cleaner reporter view, and the PDF for a fixed printable summary."
1467
  )
1468
+ return note, str(csv_path), str(handoff_csv_path), str(pdf_path)
1469
 
1470
 
1471
  def _timeline_window_from_url(url: str) -> tuple[int, str, str]:
 
1887
  options = _relationship_options(ranked)
1888
  valid_ids = {value for _, value in options}
1889
  selected = relationship_id if relationship_id in valid_ids else (options[0][1] if options else None)
1890
+ export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
1891
+ filtered_edges, selected or "", ranking_mode
1892
+ )
1893
  return (
1894
  _overview_summary_markdown(
1895
  ranked,
 
1912
  _relationship_timeline_html(filtered_edges, selected or ""),
1913
  export_note,
1914
  export_csv,
1915
+ export_handoff_csv,
1916
  export_pdf,
1917
  )
1918
 
 
1925
  relationship_id: str,
1926
  ):
1927
  filtered_edges = _overview_edges(member_query, family, only_strong, int(top_n))
1928
+ export_note, export_csv, export_handoff_csv, export_pdf = _write_relationship_export_bundle(
1929
+ filtered_edges, relationship_id, ranking_mode
1930
+ )
1931
  return (
1932
  _relationship_detail_markdown(filtered_edges, relationship_id, ranking_mode),
1933
  _relationship_timeline_html(filtered_edges, relationship_id),
1934
  export_note,
1935
  export_csv,
1936
+ export_handoff_csv,
1937
  export_pdf,
1938
  )
1939
 
 
2072
  overview_timeline_html = gr.HTML()
2073
  export_note_md = gr.Markdown()
2074
  with gr.Row():
2075
+ export_csv_file = gr.File(label="Raw evidence CSV", interactive=False)
2076
+ export_handoff_csv_file = gr.File(label="Reporter handoff CSV", interactive=False)
2077
  export_pdf_file = gr.File(label="Evidence breakdown PDF", interactive=False)
2078
 
2079
  search_button.click(
2080
  _update_overview,
2081
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2082
+ [overview_summary_md, overview_cards, relationship_choice, overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_handoff_csv_file, export_pdf_file],
2083
  )
2084
  overview_member.submit(
2085
  _update_overview,
2086
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2087
+ [overview_summary_md, overview_cards, relationship_choice, overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_handoff_csv_file, export_pdf_file],
2088
  )
2089
  for control in (overview_family, overview_ranking_mode, overview_only_strong, overview_top_n):
2090
  control.change(
2091
  _update_overview,
2092
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2093
+ [overview_summary_md, overview_cards, relationship_choice, overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_handoff_csv_file, export_pdf_file],
2094
  )
2095
  relationship_choice.change(
2096
  _update_overview_detail,
2097
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2098
+ [overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_handoff_csv_file, export_pdf_file],
2099
  )
2100
 
2101
  with gr.Accordion("Explore the network map (optional)", open=False):
 
2193
  app.load(
2194
  _update_overview,
2195
  [overview_member, overview_family, overview_only_strong, overview_top_n, overview_ranking_mode, relationship_choice],
2196
+ [overview_summary_md, overview_cards, relationship_choice, overview_detail_md, overview_timeline_html, export_note_md, export_csv_file, export_handoff_csv_file, export_pdf_file],
2197
  )
2198
  app.load(
2199
  _update_graph,