cjc0013 commited on
Commit
86f2170
·
verified ·
1 Parent(s): d019a92

Shift Space to overview-first ranked relationships UX

Browse files
__pycache__/public_space_app.cpython-311.pyc CHANGED
Binary files a/__pycache__/public_space_app.cpython-311.pyc and b/__pycache__/public_space_app.cpython-311.pyc differ
 
public_copy.json CHANGED
@@ -4,7 +4,7 @@
4
  "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
5
  "dataset_repo_id": "cjc0013/cmp-data",
6
  "space_repo_id": "cjc0013/cmp",
7
- "welcome_markdown": "# Congress Public Records Slice\n\nStart with the **Network Graph** tab.\n\n- Search one House member name first.\n- Green dots are House members, rust dots are funding recipients, and gold dots are sectors.\n- Lines show public-record support in this released slice; thicker lines mean more supporting rows.\n- Use **Explore** and **Event Detail** to inspect the underlying rows, source URLs, and SHA-backed artifacts.\n\nThis is an exploration tool, not an accusation tool.",
8
  "landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
9
  "downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
10
  "dataset_bundle_prefix": "dataset_bundle"
 
4
  "subtitle": "Neutral Records explorer for a public-record slice of congressional money-and-power linkages.",
5
  "dataset_repo_id": "cjc0013/cmp-data",
6
  "space_repo_id": "cjc0013/cmp",
7
+ "welcome_markdown": "# Congress Public Records Slice\n\nStart with **Overview** for the clearest read.\n\n- Pick one House member first.\n- Use **Overview** to see the strongest sectors or funding recipients for that member.\n- Use **Explain Link** to see why one relationship appears in this released slice.\n- Use **Explore Graph** only if you want a secondary visual map.\n\nThis is an exploration tool, not an accusation tool.",
8
  "landing_markdown": "# Congress Public Records Slice\n\nA neutral, review-oriented slice of House public-record linkages across financial disclosures, sector overlap, and community project funding recipient relationships.\n\n- This release is a slice of public-record data, not a complete accounting of all potentially relevant data.\n- Future releases may update or expand this slice as source recovery, parsing, and evidence linkage improve.\n- This release does not assign guilt, wrongdoing, intent, or causality to any person or organization.\n- The release shows public-record overlaps, timing, and linkage strength, not proof of illegality or corruption.\n- Some rows remain review-tier or include unresolved official source references and should be read with those labels in mind.\n- The public package includes verification summaries and SHA-backed artifact indexes, but it does not include the full internal raw corpus, so external verification is bounded by what is published here.",
9
  "downloads_markdown": "## Downloads\n\n- Dataset repo id: `cjc0013/cmp-data`\n- Space repo id: `cjc0013/cmp`\n\nUse the dataset bundle files for direct review, CSV download, and SHA-backed source checks.",
10
  "dataset_bundle_prefix": "dataset_bundle"
public_space_app.py CHANGED
@@ -3,9 +3,11 @@ from __future__ import annotations
3
  import html
4
  import json
5
  import os
 
6
  import urllib.request
7
  from pathlib import Path
8
  from typing import Any, Dict, Tuple
 
9
 
10
  import pandas as pd
11
 
@@ -166,8 +168,9 @@ def _graph_intro_markdown(config: Dict[str, Any]) -> str:
166
  )
167
  return "\n".join(
168
  [
169
- "### What you are looking at",
170
  "",
 
171
  "- Green dots are House members, rust dots are funding recipients, and gold dots are sectors.",
172
  "- Thicker lines mean more supporting relationship rows in this released slice.",
173
  opening_line,
@@ -228,12 +231,105 @@ def _graph_view_summary_markdown(
228
  return "\n".join(lines)
229
 
230
 
231
- def _graph_table(edges: pd.DataFrame) -> pd.DataFrame:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
232
  if edges.empty:
233
- return pd.DataFrame(columns=["member", "target", "relationship_view", "strength", "supporting_rows"])
234
  rows: list[dict[str, Any]] = []
235
  for row in edges.to_dict("records"):
236
- status = str(row.get("relationship_status", "") or "")
237
  family = str(row.get("relationship_family", "") or "")
238
  stronger_support = int(
239
  row.get("linked_count", 0) or 0
@@ -245,21 +341,189 @@ def _graph_table(edges: pd.DataFrame) -> pd.DataFrame:
245
  if family == "recipient"
246
  else row.get("weak_event_count", 0) or 0
247
  )
248
- source_examples = ", ".join(_split_pipe_values(row.get("source_urls", ""), limit=2))
249
  rows.append(
250
  {
 
251
  "member": str(row.get("member_name") or row.get("member_slug") or ""),
252
- "target": str(row.get("target_label") or ""),
253
- "relationship_view": _plain_family_label(family),
254
- "strength": _plain_status_label(status),
255
- "supporting_rows": int(row.get("link_count", 0) or 0),
256
- "stronger_support": stronger_support,
257
- "caution_support": caution_support,
258
- "unresolved_refs": int(row.get("unresolved_source_ref_count", 0) or 0),
259
- "source_examples": source_examples,
 
 
260
  }
261
  )
262
- return pd.DataFrame(rows)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
 
264
 
265
  def _filter_events(events: pd.DataFrame, member_query: str, event_type: str, score_label: str, text_query: str) -> pd.DataFrame:
@@ -515,7 +779,6 @@ def _event_detail(events: pd.DataFrame, provenance: pd.DataFrame, event_id: str)
515
  def build_app(copy_path: str | Path):
516
  data = load_release_data(copy_path)
517
  events = data["events"]
518
- links = data["links"]
519
  nodes = data["graph_nodes"]
520
  edges = data["graph_edges"]
521
  provenance = data["event_provenance"]
@@ -543,25 +806,97 @@ def build_app(copy_path: str | Path):
543
  event_id_choices = sorted(events["event_id"].dropna().unique().tolist())
544
  graph_defaults = data["graph_config"].get("default_filters") or {}
545
  overview_member_limit = int(graph_defaults.get("overview_member_limit", 8))
 
546
 
547
  with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
548
  gr.Markdown(copy_payload.get("welcome_markdown", copy_payload.get("landing_markdown", "")))
549
- with gr.Tab("Network Graph"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
550
  gr.Markdown(_graph_intro_markdown(data["graph_config"]))
551
  with gr.Row():
552
- family = gr.Dropdown(label="Relationship view", choices=graph_family_choices, value=str(graph_defaults.get("relationship_family", "sector")))
553
- member_graph_query = gr.Textbox(label="House member to focus", value=str(graph_defaults.get("default_member_search", "")))
554
  target_query = gr.Textbox(label="Recipient or sector search")
555
  graph_score = gr.Dropdown(label="Score label", choices=graph_score_choices, value="all")
556
- review_status = gr.Dropdown(label="Relationship strength", choices=graph_status_choices, value=str(graph_defaults.get("review_status", "stronger")))
557
  if example_member_choices:
558
  gr.Examples(examples=example_member_choices, inputs=[member_graph_query], label="Try one of these example members")
559
  with gr.Row():
560
- hide_unresolved_only = gr.Checkbox(label="Hide unresolved relationships", value=bool(graph_defaults.get("hide_unresolved_only", True)))
561
- max_edges = gr.Slider(label="Max visible relationships", minimum=25, maximum=300, step=25, value=int(graph_defaults.get("max_edges", 60)))
562
  graph_summary_md = gr.Markdown()
563
  graph_html = gr.HTML()
564
- gr.Markdown("#### Relationships in this view")
565
  graph_df = gr.Dataframe(interactive=False)
566
  def _update_graph(family: str, member_graph_query: str, target_query: str, graph_score: str, review_status: str, hide_unresolved_only: bool, max_edges: int):
567
  filtered_edges = _filter_graph(edges, family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges, overview_member_limit)
@@ -578,7 +913,7 @@ def build_app(copy_path: str | Path):
578
  for control in (family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges):
579
  control.change(_update_graph, [family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges], [graph_summary_md, graph_html, graph_df])
580
  app.load(_update_graph, [family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges], [graph_summary_md, graph_html, graph_df])
581
- with gr.Tab("Explore"):
582
  with gr.Row():
583
  member_query = gr.Textbox(label="Member name or slug")
584
  event_type = gr.Dropdown(label="Event type", choices=event_type_choices, value="all")
 
3
  import html
4
  import json
5
  import os
6
+ import re
7
  import urllib.request
8
  from pathlib import Path
9
  from typing import Any, Dict, Tuple
10
+ from urllib.parse import urlparse
11
 
12
  import pandas as pd
13
 
 
168
  )
169
  return "\n".join(
170
  [
171
+ "### Optional graph view",
172
  "",
173
+ "- Use this only after the overview if you want a visual map.",
174
  "- Green dots are House members, rust dots are funding recipients, and gold dots are sectors.",
175
  "- Thicker lines mean more supporting relationship rows in this released slice.",
176
  opening_line,
 
231
  return "\n".join(lines)
232
 
233
 
234
+ def _plain_reason_code(value: str) -> str:
235
+ normalized = str(value or "").strip()
236
+ mapping = {
237
+ "recipient_exact_match": "Exact recipient match",
238
+ "issuer_match": "Issuer or company match",
239
+ "legislative_relevance_match": "Legislative topic match",
240
+ "major_vote_overlap": "Vote activity overlaps the same topic window",
241
+ "lobbying_issue_overlap": "Lobbying activity overlaps the same topic window",
242
+ "legislative_density_support": "Many related bill records in the same area",
243
+ "vote_density_support": "Many related vote records in the same area",
244
+ "lobbying_density_support": "Many related lobbying filings in the same area",
245
+ "insufficient_official_support": "Not enough official support for a stronger label",
246
+ }
247
+ return mapping.get(normalized, normalized.replace("_", " ").title() or "Signal")
248
+
249
+
250
+ def _edge_evidence_chips(row: Dict[str, Any]) -> list[str]:
251
+ urls = _split_pipe_values(row.get("source_urls", ""), limit=12)
252
+ reason_codes = set(_split_pipe_values(row.get("reason_codes", ""), limit=20))
253
+ chips: list[str] = []
254
+ if any("/ptr-pdfs/" in url for url in urls):
255
+ chips.append("trade disclosure")
256
+ if any("/financial-pdfs/" in url for url in urls):
257
+ chips.append("annual disclosure")
258
+ if any("govinfo.gov/bulkdata/BILLSTATUS" in url for url in urls):
259
+ chips.append("bill record")
260
+ if any("usaspending.gov/award/" in url for url in urls):
261
+ chips.append("funding award")
262
+ if any("committee_info" in url for url in urls):
263
+ chips.append("committee roster")
264
+ if "major_vote_overlap" in reason_codes or "vote_density_support" in reason_codes:
265
+ chips.append("vote activity")
266
+ if "lobbying_issue_overlap" in reason_codes or "lobbying_density_support" in reason_codes:
267
+ chips.append("lobbying activity")
268
+ if int(row.get("profile_link_count", 0) or 0) > 0:
269
+ chips.append("member profile")
270
+ deduped: list[str] = []
271
+ for chip in chips:
272
+ if chip not in deduped:
273
+ deduped.append(chip)
274
+ return deduped[:6]
275
+
276
+
277
+ def _window_overlap_text(row: Dict[str, Any]) -> str:
278
+ reason_codes = set(_split_pipe_values(row.get("reason_codes", ""), limit=20))
279
+ overlap_signals = [code for code in reason_codes if "overlap" in code]
280
+ if overlap_signals:
281
+ count = len(overlap_signals)
282
+ return f"yes ({count} overlap signal{'s' if count != 1 else ''})"
283
+ if int(row.get("profile_link_count", 0) or 0) > 0:
284
+ return "profile support only"
285
+ if int(row.get("unresolved_source_ref_count", 0) or 0) > 0:
286
+ return "some timing still unresolved"
287
+ return "not explicit in this row"
288
+
289
+
290
+ def _relationship_score(row: Dict[str, Any]) -> int:
291
+ status = str(row.get("relationship_status", "") or "")
292
+ family = str(row.get("relationship_family", "") or "")
293
+ stronger_support = int(
294
+ row.get("linked_count", 0) or 0
295
+ if family == "recipient"
296
+ else row.get("strong_event_count", 0) or 0
297
+ )
298
+ status_base = {
299
+ "linked": 78,
300
+ "release_ok": 74,
301
+ "acceptable_with_label": 56,
302
+ "needs_review": 44,
303
+ "unresolved": 20,
304
+ }.get(status, 30)
305
+ score = status_base
306
+ score += min(int(row.get("link_count", 0) or 0) * 3, 15)
307
+ score += min(stronger_support * 4, 18)
308
+ score += min(len(_edge_evidence_chips(row)) * 2, 10)
309
+ score -= min(int(row.get("unresolved_source_ref_count", 0) or 0), 12)
310
+ return max(0, min(100, score))
311
+
312
+
313
+ def _rank_relationships(edges: pd.DataFrame) -> pd.DataFrame:
314
+ columns = [
315
+ "rank",
316
+ "relationship_id",
317
+ "member",
318
+ "counterparty / sector",
319
+ "overall score",
320
+ "strength",
321
+ "evidence",
322
+ "time-window overlap",
323
+ "supporting rows",
324
+ "stronger support",
325
+ "needs caution",
326
+ "unresolved refs",
327
+ "source_examples",
328
+ ]
329
  if edges.empty:
330
+ return pd.DataFrame(columns=columns)
331
  rows: list[dict[str, Any]] = []
332
  for row in edges.to_dict("records"):
 
333
  family = str(row.get("relationship_family", "") or "")
334
  stronger_support = int(
335
  row.get("linked_count", 0) or 0
 
341
  if family == "recipient"
342
  else row.get("weak_event_count", 0) or 0
343
  )
344
+ chips = _edge_evidence_chips(row)
345
  rows.append(
346
  {
347
+ "relationship_id": str(row.get("edge_id") or ""),
348
  "member": str(row.get("member_name") or row.get("member_slug") or ""),
349
+ "counterparty / sector": str(row.get("target_label") or ""),
350
+ "overall score": _relationship_score(row),
351
+ "strength": _plain_status_label(str(row.get("relationship_status", "") or "")),
352
+ "evidence": " | ".join(chips) if chips else "published source support",
353
+ "time-window overlap": _window_overlap_text(row),
354
+ "supporting rows": int(row.get("link_count", 0) or 0),
355
+ "stronger support": stronger_support,
356
+ "needs caution": caution_support,
357
+ "unresolved refs": int(row.get("unresolved_source_ref_count", 0) or 0),
358
+ "source_examples": ", ".join(_split_pipe_values(row.get("source_urls", ""), limit=2)),
359
  }
360
  )
361
+ ranked = pd.DataFrame(rows).sort_values(
362
+ ["overall score", "supporting rows", "stronger support", "counterparty / sector"],
363
+ ascending=[False, False, False, True],
364
+ ).reset_index(drop=True)
365
+ ranked.insert(0, "rank", range(1, len(ranked) + 1))
366
+ return ranked
367
+
368
+
369
+ def _overview_summary_markdown(
370
+ ranked: pd.DataFrame,
371
+ *,
372
+ member_query: str,
373
+ family: str,
374
+ only_strong_links: bool,
375
+ top_n: int,
376
+ ) -> str:
377
+ if ranked.empty:
378
+ return "\n".join(
379
+ [
380
+ "### Overview",
381
+ "",
382
+ "No relationships match the current filters.",
383
+ "",
384
+ "Try a different House member, switch from sectors to funding recipients, or turn off the strong-links-only filter.",
385
+ ]
386
+ )
387
+ focus_names = [str(value) for value in ranked["member"].dropna().unique().tolist() if str(value).strip()]
388
+ focus_label = ", ".join(focus_names[:3])
389
+ lines = [
390
+ "### Overview",
391
+ "",
392
+ f"- Showing the top `{min(int(top_n), len(ranked))}` `{_plain_family_label(family).lower()}` for `{focus_label}`.",
393
+ f"- Filtered to stronger links only: `{str(bool(only_strong_links)).lower()}`.",
394
+ f"- Highest score in this view: `{int(ranked['overall score'].max())}`.",
395
+ "- Pick one relationship below to see the evidence breakdown and coarse evidence window.",
396
+ ]
397
+ if not str(member_query or "").strip():
398
+ lines.append("- Tip: search one House member for the clearest first read.")
399
+ return "\n".join(lines)
400
+
401
+
402
+ def _relationship_options(ranked: pd.DataFrame) -> list[tuple[str, str]]:
403
+ if ranked.empty:
404
+ return []
405
+ options: list[tuple[str, str]] = []
406
+ for row in ranked.to_dict("records"):
407
+ label = f"{row['member']} -> {row['counterparty / sector']} (score {row['overall score']})"
408
+ options.append((label, str(row["relationship_id"])))
409
+ return options
410
+
411
+
412
+ def _select_edge_row(edges: pd.DataFrame, relationship_id: str) -> Dict[str, Any] | None:
413
+ if edges.empty or not relationship_id:
414
+ return None
415
+ matched = edges[edges["edge_id"] == relationship_id]
416
+ if matched.empty:
417
+ return None
418
+ return matched.head(1).to_dict("records")[0]
419
+
420
+
421
+ def _relationship_detail_markdown(edges: pd.DataFrame, relationship_id: str) -> str:
422
+ row = _select_edge_row(edges, relationship_id)
423
+ if not row:
424
+ return "Select a relationship to inspect why it appears in this released slice."
425
+ family = str(row.get("relationship_family", "") or "")
426
+ chips = _edge_evidence_chips(row)
427
+ reason_codes = [_plain_reason_code(item) for item in _split_pipe_values(row.get("reason_codes", ""), limit=8)]
428
+ urls = _split_pipe_values(row.get("source_urls", ""), limit=5)
429
+ lines = [
430
+ f"### {row.get('member_name') or row.get('member_slug')} -> {row.get('target_label')}",
431
+ "",
432
+ f"- Relationship view: `{_plain_family_label(family)}`",
433
+ f"- Strength label: `{_plain_status_label(str(row.get('relationship_status', '') or ''))}`",
434
+ f"- Overall score: `{_relationship_score(row)}`",
435
+ f"- Supporting relationship rows: `{int(row.get('link_count', 0) or 0)}`",
436
+ f"- Stronger-support rows: `{int(row.get('linked_count', 0) or 0) if family == 'recipient' else int(row.get('strong_event_count', 0) or 0)}`",
437
+ f"- Caution / weaker rows: `{int(row.get('review_count', 0) or 0) if family == 'recipient' else int(row.get('weak_event_count', 0) or 0)}`",
438
+ f"- Unresolved source refs still counted: `{int(row.get('unresolved_source_ref_count', 0) or 0)}`",
439
+ f"- Evidence signals: `{', '.join(chips) if chips else 'published source support'}`",
440
+ f"- Time-window overlap: `{_window_overlap_text(row)}`",
441
+ ]
442
+ if reason_codes:
443
+ lines.extend(["", "#### Why it is linked in this slice", ""])
444
+ lines.extend(f"- {item}" for item in reason_codes)
445
+ if urls:
446
+ lines.extend(["", "#### Example published source URLs", ""])
447
+ lines.extend(f"- {item}" for item in urls)
448
+ return "\n".join(lines)
449
+
450
+
451
+ def _timeline_window_from_url(url: str) -> tuple[int, str, str]:
452
+ normalized = str(url or "").strip()
453
+ if not normalized:
454
+ return (99, "Published source", "No public URL attached in this row")
455
+ if "/ptr-pdfs/" in normalized or "/financial-pdfs/" in normalized:
456
+ match = re.search(r"/(\d{4})/", normalized)
457
+ year_label = match.group(1) if match else "Disclosure year"
458
+ kind = "Trade disclosure" if "/ptr-pdfs/" in normalized else "Annual disclosure"
459
+ return (10, year_label, kind)
460
+ if "BILLSTATUS-118" in normalized:
461
+ return (20, "2023-2024", "Bill and vote records (118th Congress)")
462
+ if "BILLSTATUS-119" in normalized:
463
+ return (30, "2025-2026", "Bill and vote records (119th Congress)")
464
+ if "usaspending.gov/award/" in normalized:
465
+ return (40, "Published award record", "Federal award record")
466
+ if "committee_info" in normalized:
467
+ return (50, "Current committee reference", "Committee roster")
468
+ return (60, "Published source", urlparse(normalized).netloc if normalized.startswith("http") else "Published source")
469
+
470
+
471
+ def _relationship_timeline_html(edges: pd.DataFrame, relationship_id: str) -> str:
472
+ row = _select_edge_row(edges, relationship_id)
473
+ if not row:
474
+ return "<div style=\"padding: 1rem; border: 1px solid #d6d0c4; background: #fffdf8; color: #3a3a3a;\">Choose a relationship to see its evidence window.</div>"
475
+ entries: list[tuple[int, str, str, str]] = []
476
+ seen: set[tuple[str, str, str]] = set()
477
+ for url in _split_pipe_values(row.get("source_urls", ""), limit=8):
478
+ sort_key, window_label, track_label = _timeline_window_from_url(url)
479
+ detail = url
480
+ dedupe_key = (window_label, track_label, detail)
481
+ if dedupe_key not in seen:
482
+ seen.add(dedupe_key)
483
+ entries.append((sort_key, window_label, track_label, detail))
484
+ if int(row.get("profile_link_count", 0) or 0) > 0:
485
+ entries.append((70, "Undated support", "Member profile support", "Profile-based support is included in this relationship summary."))
486
+ if int(row.get("unresolved_source_ref_count", 0) or 0) > 0:
487
+ entries.append((80, "Partly unresolved", "Some official references remain unresolved", f"{int(row.get('unresolved_source_ref_count', 0) or 0)} unresolved refs are still counted in this released row."))
488
+ entries = sorted(entries, key=lambda item: (item[0], item[1], item[2], item[3]))
489
+ if not entries:
490
+ return "<div style=\"padding: 1rem; border: 1px solid #d6d0c4; background: #fffdf8; color: #3a3a3a;\">No evidence-window entries are available for this relationship.</div>"
491
+ cards = []
492
+ for _, window_label, track_label, detail in entries[:8]:
493
+ cards.append(
494
+ "<div style=\"display:flex; gap:16px; align-items:flex-start; margin:0 0 16px 0;\">"
495
+ f"<div style=\"min-width:120px; font-weight:700; color:#6b4e16;\">{html.escape(window_label)}</div>"
496
+ "<div style=\"border-left:3px solid #c08d2e; padding-left:14px;\">"
497
+ f"<div style=\"font-weight:700; color:#1f2b2d;\">{html.escape(track_label)}</div>"
498
+ f"<div style=\"color:#3d3d3d; margin-top:4px;\">{html.escape(detail)}</div>"
499
+ "</div>"
500
+ "</div>"
501
+ )
502
+ return (
503
+ "<div style=\"border:1px solid #d6d0c4; border-radius:12px; background:#fffdf8; padding:16px;\">"
504
+ "<div style=\"font-weight:700; margin-bottom:10px; color:#1f2b2d;\">Why this relationship appears</div>"
505
+ "<div style=\"color:#5c5c5c; margin-bottom:14px;\">This is a coarse evidence window based on the time hints published in this release. It is not exact chronology.</div>"
506
+ + "".join(cards)
507
+ + "</div>"
508
+ )
509
+
510
+
511
+ def _graph_table(edges: pd.DataFrame) -> pd.DataFrame:
512
+ ranked = _rank_relationships(edges)
513
+ if ranked.empty:
514
+ return ranked
515
+ return ranked[
516
+ [
517
+ "rank",
518
+ "member",
519
+ "counterparty / sector",
520
+ "overall score",
521
+ "strength",
522
+ "evidence",
523
+ "time-window overlap",
524
+ "supporting rows",
525
+ ]
526
+ ]
527
 
528
 
529
  def _filter_events(events: pd.DataFrame, member_query: str, event_type: str, score_label: str, text_query: str) -> pd.DataFrame:
 
779
  def build_app(copy_path: str | Path):
780
  data = load_release_data(copy_path)
781
  events = data["events"]
 
782
  nodes = data["graph_nodes"]
783
  edges = data["graph_edges"]
784
  provenance = data["event_provenance"]
 
806
  event_id_choices = sorted(events["event_id"].dropna().unique().tolist())
807
  graph_defaults = data["graph_config"].get("default_filters") or {}
808
  overview_member_limit = int(graph_defaults.get("overview_member_limit", 8))
809
+ default_member_search = str(graph_defaults.get("default_member_search", "") or "")
810
 
811
  with gr.Blocks(title=copy_payload.get("title", "Congress Public Records Slice")) as app:
812
  gr.Markdown(copy_payload.get("welcome_markdown", copy_payload.get("landing_markdown", "")))
813
+ with gr.Tab("Overview"):
814
+ gr.Markdown(
815
+ "### Start here\n\n"
816
+ "Pick one House member, choose whether you want sectors or funding recipients, and read the ranked list first."
817
+ )
818
+ with gr.Row():
819
+ overview_member = gr.Textbox(label="House member", value=default_member_search)
820
+ overview_family = gr.Dropdown(label="Show", choices=[("Sectors", "sector"), ("Funding recipients", "recipient")], value="sector")
821
+ overview_only_strong = gr.Checkbox(label="Only strong links", value=True)
822
+ overview_top_n = gr.Slider(label="Show top relationships", minimum=5, maximum=40, step=5, value=10)
823
+ if example_member_choices:
824
+ gr.Examples(examples=example_member_choices, inputs=[overview_member], label="Try one of these example members")
825
+ overview_summary_md = gr.Markdown()
826
+ overview_df = gr.Dataframe(interactive=False)
827
+ relationship_choice = gr.Dropdown(label="Relationship to explain", choices=[], value=None)
828
+ overview_detail_md = gr.Markdown()
829
+ overview_timeline_html = gr.HTML()
830
+
831
+ def _overview_edges(member_query: str, family: str, only_strong: bool, top_n: int) -> pd.DataFrame:
832
+ return _filter_graph(
833
+ edges,
834
+ family,
835
+ member_query,
836
+ "",
837
+ "all",
838
+ "stronger" if only_strong else "all",
839
+ True,
840
+ top_n,
841
+ overview_member_limit,
842
+ )
843
+
844
+ def _update_overview(member_query: str, family: str, only_strong: bool, top_n: int):
845
+ filtered_edges = _overview_edges(member_query, family, only_strong, top_n)
846
+ ranked = _rank_relationships(filtered_edges)
847
+ options = _relationship_options(ranked)
848
+ selected = options[0][1] if options else None
849
+ display = ranked.drop(columns=["relationship_id", "source_examples"], errors="ignore")
850
+ return (
851
+ _overview_summary_markdown(
852
+ ranked,
853
+ member_query=member_query,
854
+ family=family,
855
+ only_strong_links=only_strong,
856
+ top_n=top_n,
857
+ ),
858
+ display,
859
+ gr.update(choices=options, value=selected),
860
+ _relationship_detail_markdown(filtered_edges, selected or ""),
861
+ _relationship_timeline_html(filtered_edges, selected or ""),
862
+ )
863
+
864
+ def _update_overview_detail(member_query: str, family: str, only_strong: bool, top_n: int, relationship_id: str):
865
+ filtered_edges = _overview_edges(member_query, family, only_strong, top_n)
866
+ return _relationship_detail_markdown(filtered_edges, relationship_id), _relationship_timeline_html(filtered_edges, relationship_id)
867
+
868
+ for control in (overview_member, overview_family, overview_only_strong, overview_top_n):
869
+ control.change(
870
+ _update_overview,
871
+ [overview_member, overview_family, overview_only_strong, overview_top_n],
872
+ [overview_summary_md, overview_df, relationship_choice, overview_detail_md, overview_timeline_html],
873
+ )
874
+ relationship_choice.change(
875
+ _update_overview_detail,
876
+ [overview_member, overview_family, overview_only_strong, overview_top_n, relationship_choice],
877
+ [overview_detail_md, overview_timeline_html],
878
+ )
879
+ app.load(
880
+ _update_overview,
881
+ [overview_member, overview_family, overview_only_strong, overview_top_n],
882
+ [overview_summary_md, overview_df, relationship_choice, overview_detail_md, overview_timeline_html],
883
+ )
884
+ with gr.Tab("Explore Graph (optional)"):
885
  gr.Markdown(_graph_intro_markdown(data["graph_config"]))
886
  with gr.Row():
887
+ family = gr.Dropdown(label="Show", choices=graph_family_choices, value=str(graph_defaults.get("relationship_family", "sector")))
888
+ member_graph_query = gr.Textbox(label="House member to focus", value=default_member_search)
889
  target_query = gr.Textbox(label="Recipient or sector search")
890
  graph_score = gr.Dropdown(label="Score label", choices=graph_score_choices, value="all")
891
+ review_status = gr.Dropdown(label="Which links to show", choices=graph_status_choices, value=str(graph_defaults.get("review_status", "stronger")))
892
  if example_member_choices:
893
  gr.Examples(examples=example_member_choices, inputs=[member_graph_query], label="Try one of these example members")
894
  with gr.Row():
895
+ hide_unresolved_only = gr.Checkbox(label="Hide unresolved links", value=bool(graph_defaults.get("hide_unresolved_only", True)))
896
+ max_edges = gr.Slider(label="Show top relationships", minimum=25, maximum=300, step=25, value=int(graph_defaults.get("max_edges", 60)))
897
  graph_summary_md = gr.Markdown()
898
  graph_html = gr.HTML()
899
+ gr.Markdown("#### Relationship list for this graph view")
900
  graph_df = gr.Dataframe(interactive=False)
901
  def _update_graph(family: str, member_graph_query: str, target_query: str, graph_score: str, review_status: str, hide_unresolved_only: bool, max_edges: int):
902
  filtered_edges = _filter_graph(edges, family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges, overview_member_limit)
 
913
  for control in (family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges):
914
  control.change(_update_graph, [family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges], [graph_summary_md, graph_html, graph_df])
915
  app.load(_update_graph, [family, member_graph_query, target_query, graph_score, review_status, hide_unresolved_only, max_edges], [graph_summary_md, graph_html, graph_df])
916
+ with gr.Tab("Search Events"):
917
  with gr.Row():
918
  member_query = gr.Textbox(label="Member name or slug")
919
  event_type = gr.Dropdown(label="Event type", choices=event_type_choices, value="all")