cjc0013 commited on
Commit
47078c4
·
verified ·
1 Parent(s): e94095f

Improve drone Space map presentation

Browse files

Research-steered map-first update: default grouped markers by coordinate/site, marker size by case count, strongest-evidence coloring, coordinate-quality symbols, repeated-marker mode, and source-linked marker detail panels.

Files changed (3) hide show
  1. README.md +1 -1
  2. public_space_app.py +293 -98
  3. space_manifest.json +6 -6
README.md CHANGED
@@ -13,4 +13,4 @@ python_version: 3.11
13
 
14
  Map-first review surface for public-source reports about mystery, unidentified, suspicious, or unauthorized drone activity around sensitive sites.
15
 
16
- This Space plots the expanded case set on a world map, with evidence tiers, source links, coordinate-quality labels, country/site filters, and row-level claim boundaries. It does not claim that any row proves threat, attribution, anomalous origin, or hostile intent.
 
13
 
14
  Map-first review surface for public-source reports about mystery, unidentified, suspicious, or unauthorized drone activity around sensitive sites.
15
 
16
+ The default map groups repeated reports by coordinate/site so dense clusters are readable. Marker size is case count, color is strongest evidence tier, and symbol is coordinate quality. Selecting a marker opens the source-linked cases behind it.
public_space_app.py CHANGED
@@ -8,7 +8,18 @@ import pandas as pd
8
  import plotly.express as px
9
 
10
 
11
- DISPLAY_COLUMNS = [
 
 
 
 
 
 
 
 
 
 
 
12
  "case_rank",
13
  "evidence_tier",
14
  "report_date",
@@ -20,31 +31,55 @@ DISPLAY_COLUMNS = [
20
  "source_domain",
21
  "followup_status",
22
  ]
 
 
 
 
 
 
 
 
 
 
 
23
 
24
 
25
  def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
26
  cases = pd.read_csv(data_dir / "mystery_drone_sensitive_site_cases.csv").fillna("")
27
  manifest = json.loads((data_dir / "release_manifest.json").read_text(encoding="utf-8"))
28
  quality = json.loads((data_dir / "quality_report.json").read_text(encoding="utf-8"))
 
 
 
29
  cases["report_year"] = cases["report_date"].astype(str).str.slice(0, 4).replace("", "unknown")
 
 
 
 
 
 
 
 
 
 
 
 
30
  return cases, manifest, quality
31
 
32
 
33
  def _markdown_header(manifest: dict, quality: dict) -> str:
34
  tiers = manifest.get("counts_by_evidence_tier", {})
35
- countries = manifest.get("counts_by_country", {})
36
- top_countries = ", ".join(f"{key}: {value}" for key, value in list(countries.items())[:7])
37
  return f"""# Mystery Drone Reports Around Sensitive Sites
38
 
39
- Map-first public review surface for mystery, unidentified, suspicious, or unauthorized drone reports around military, airport, maritime, emergency-service, and critical-infrastructure contexts.
40
 
41
- **{manifest.get("case_count", 0)} cases** | **{manifest.get("probable_cluster_count", 0)} probable clusters** | **release gate: {"pass" if quality.get("release_grade") else "review"}**
42
 
43
- Evidence tiers: resolved sensitive-site reports `{tiers.get("resolved_sensitive_site_report", 0)}`, named-site cases `{tiers.get("named_sensitive_site_report", 0)}`, source-discovered reports `{tiers.get("source_discovered_report", 0)}`.
44
 
45
- Country coverage: {top_countries}
46
 
47
- Rows are source-indexed report cases, not verified findings of threat, attribution, anomalous origin, or hostile intent. Map points use the best public coordinate available: site centroid, city/region centroid, country centroid, or a clearly labeled fallback.
48
  """
49
 
50
 
@@ -52,20 +87,54 @@ def _options(values: pd.Series) -> list[str]:
52
  return sorted(str(value) for value in values.dropna().astype(str).unique() if str(value))
53
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  def _filter_cases(
56
  cases: pd.DataFrame,
57
  evidence_tiers: list[str] | None,
 
58
  countries: list[str] | None,
59
  site_types: list[str] | None,
 
 
60
  query: str,
61
  ) -> pd.DataFrame:
62
  filtered = cases.copy()
63
  if evidence_tiers:
64
  filtered = filtered[filtered["evidence_tier"].isin(evidence_tiers)]
 
 
65
  if countries:
66
  filtered = filtered[filtered["country"].isin(countries)]
67
  if site_types:
68
  filtered = filtered[filtered["site_type"].isin(site_types)]
 
 
 
 
69
  query = str(query or "").strip().lower()
70
  if query:
71
  haystack = (
@@ -73,6 +142,8 @@ def _filter_cases(
73
  + " "
74
  + filtered["site_name"].astype(str)
75
  + " "
 
 
76
  + filtered["country"].astype(str)
77
  + " "
78
  + filtered["source_domain"].astype(str)
@@ -81,133 +152,257 @@ def _filter_cases(
81
  return filtered.sort_values(["case_rank"]).reset_index(drop=True)
82
 
83
 
84
- def _summary_text(filtered: pd.DataFrame) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  if filtered.empty:
86
  return "No rows match the current filters."
87
- tier_counts = filtered["evidence_tier"].value_counts().to_dict()
88
- country_counts = filtered["country"].value_counts().head(8).to_dict()
89
- tiers = ", ".join(f"{key}: {value}" for key, value in tier_counts.items())
90
- countries = ", ".join(f"{key}: {value}" for key, value in country_counts.items())
91
- return f"Showing {len(filtered)} cases. Evidence tiers: {tiers}. Top countries: {countries}."
 
 
 
92
 
93
 
94
- def _map(filtered: pd.DataFrame):
95
- if filtered.empty:
96
- fig = px.scatter_geo(pd.DataFrame({"plot_lat": [], "plot_lon": []}), lat="plot_lat", lon="plot_lon", height=620)
97
  fig.update_layout(margin={"l": 0, "r": 0, "t": 20, "b": 0})
98
  return fig
99
- plot_rows = filtered.copy()
100
- plot_rows["plot_lat"] = pd.to_numeric(plot_rows["plot_lat"], errors="coerce")
101
- plot_rows["plot_lon"] = pd.to_numeric(plot_rows["plot_lon"], errors="coerce")
102
- plot_rows = plot_rows.dropna(subset=["plot_lat", "plot_lon"])
103
  fig = px.scatter_geo(
104
- plot_rows,
105
  lat="plot_lat",
106
  lon="plot_lon",
107
- color="evidence_tier",
108
  symbol="coordinate_quality",
109
- hover_name="headline",
 
 
110
  hover_data={
111
- "case_rank": True,
112
- "site_name": True,
113
- "plot_label": True,
114
  "country": True,
115
- "report_date": True,
116
- "source_domain": True,
 
117
  "coordinate_quality": True,
118
  "plot_lat": False,
119
  "plot_lon": False,
120
  },
121
  projection="natural earth",
122
- height=660,
 
 
 
 
 
123
  )
124
- fig.update_traces(marker={"size": 9, "opacity": 0.78, "line": {"width": 0.4, "color": "white"}})
125
  fig.update_geos(showland=True, landcolor="#eef2f5", showocean=True, oceancolor="#dfeaf2", showcountries=True)
126
- fig.update_layout(margin={"l": 0, "r": 0, "t": 24, "b": 0}, legend_orientation="h")
 
 
 
 
127
  return fig
128
 
129
 
130
- def _table(filtered: pd.DataFrame) -> pd.DataFrame:
131
- return filtered[DISPLAY_COLUMNS].copy()
 
 
 
 
132
 
133
 
134
- def _render(cases: pd.DataFrame, evidence_tiers, countries, site_types, query):
135
- filtered = _filter_cases(cases, evidence_tiers, countries, site_types, query)
136
- rows = filtered.to_dict("records")
137
- return _summary_text(filtered), _map(filtered), _table(filtered), rows, _detail(rows, 0)
138
-
139
-
140
- def _detail(rows: list[dict], index: int | None) -> str:
141
- if not rows:
142
- return "No case selected."
143
  try:
144
- row = rows[int(index or 0)]
145
  except (IndexError, TypeError, ValueError):
146
- row = rows[0]
147
- return f"""### {row.get("headline", "")}
148
-
149
- - Evidence tier: `{row.get("evidence_tier", "")}`
150
- - Follow-up status: `{row.get("followup_status", "")}`
151
- - Report date: `{row.get("report_date", "")}` (`{row.get("date_quality", "")}`)
152
- - Site signal: `{row.get("site_name", "")}` / `{row.get("site_type", "")}`
153
- - Map point: `{row.get("plot_label", "")}` / `{row.get("coordinate_quality", "")}`
154
- - Location signal: `{row.get("country", "")}` `{row.get("state_region", "")}`
155
- - Source: [{row.get("publisher", "") or row.get("source_domain", "")}]({row.get("source_url", "")})
156
- - Boundary: {row.get("claim_boundary", "")}
157
- - Row hash: `{row.get("public_row_sha256", "")}`
158
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
 
161
  def build_app(data_dir: str | Path):
162
  data_dir = Path(data_dir)
163
  cases, manifest, quality = _load_data(data_dir)
 
 
164
  with gr.Blocks(title="Mystery Drone Reports Around Sensitive Sites") as app:
165
  gr.Markdown(_markdown_header(manifest, quality))
166
  with gr.Row():
167
- evidence_filter = gr.CheckboxGroup(
168
- choices=_options(cases["evidence_tier"]),
169
- value=_options(cases["evidence_tier"]),
170
- label="Evidence tier",
171
  )
172
- country_filter = gr.Dropdown(
173
- choices=_options(cases["country"]),
174
- value=[],
175
- multiselect=True,
176
- label="Country",
177
- )
178
- site_filter = gr.Dropdown(
179
- choices=_options(cases["site_type"]),
180
- value=[],
181
- multiselect=True,
182
- label="Site type",
183
- )
184
- query = gr.Textbox(label="Search", placeholder="Try Langley, Copenhagen, airport, military base")
185
  summary = gr.Markdown()
186
- map_plot = gr.Plot(label="Case map")
187
- table = gr.Dataframe(label="Filtered cases", interactive=False)
188
- rows_state = gr.State([])
189
- detail = gr.Markdown()
190
-
191
- def render(evidence_tiers, countries, site_types, search_query):
192
- return _render(cases, evidence_tiers, countries, site_types, search_query)
193
-
194
- for control in (evidence_filter, country_filter, site_filter, query):
195
- control.change(
196
- render,
197
- inputs=[evidence_filter, country_filter, site_filter, query],
198
- outputs=[summary, map_plot, table, rows_state, detail],
199
- )
200
-
201
- def select_detail(rows, evt: gr.SelectData):
 
 
 
 
 
 
 
 
 
 
 
 
202
  if not evt or evt.index is None:
203
- return _detail(rows, 0)
204
  row_index = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
205
- return _detail(rows, row_index)
206
 
207
- table.select(select_detail, inputs=rows_state, outputs=detail)
208
- app.load(
209
- render,
210
- inputs=[evidence_filter, country_filter, site_filter, query],
211
- outputs=[summary, map_plot, table, rows_state, detail],
212
- )
213
  return app
 
8
  import plotly.express as px
9
 
10
 
11
+ GROUP_COLUMNS = [
12
+ "case_count",
13
+ "strongest_evidence_tier",
14
+ "plot_label",
15
+ "coordinate_quality",
16
+ "country",
17
+ "date_span",
18
+ "probable_cluster_count",
19
+ "evidence_mix",
20
+ "top_source_domains",
21
+ ]
22
+ CASE_COLUMNS = [
23
  "case_rank",
24
  "evidence_tier",
25
  "report_date",
 
31
  "source_domain",
32
  "followup_status",
33
  ]
34
+ TIER_RANK = {
35
+ "resolved_sensitive_site_report": 0,
36
+ "named_sensitive_site_report": 1,
37
+ "source_discovered_report": 2,
38
+ }
39
+ TIER_LABEL = {
40
+ "resolved_sensitive_site_report": "resolved site report",
41
+ "named_sensitive_site_report": "named-site report",
42
+ "source_discovered_report": "source-discovered report",
43
+ }
44
+ COARSE_COORDINATE_QUALITIES = {"region_centroid", "country_centroid", "city_area_centroid"}
45
 
46
 
47
  def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
48
  cases = pd.read_csv(data_dir / "mystery_drone_sensitive_site_cases.csv").fillna("")
49
  manifest = json.loads((data_dir / "release_manifest.json").read_text(encoding="utf-8"))
50
  quality = json.loads((data_dir / "quality_report.json").read_text(encoding="utf-8"))
51
+ cases["case_rank"] = pd.to_numeric(cases["case_rank"], errors="coerce").fillna(999999).astype(int)
52
+ cases["plot_lat"] = pd.to_numeric(cases["plot_lat"], errors="coerce")
53
+ cases["plot_lon"] = pd.to_numeric(cases["plot_lon"], errors="coerce")
54
  cases["report_year"] = cases["report_date"].astype(str).str.slice(0, 4).replace("", "unknown")
55
+ cases["map_group_id"] = cases.apply(
56
+ lambda row: "|".join(
57
+ [
58
+ f"{float(row['plot_lat']):.4f}" if pd.notna(row["plot_lat"]) else "",
59
+ f"{float(row['plot_lon']):.4f}" if pd.notna(row["plot_lon"]) else "",
60
+ str(row.get("plot_label", "")),
61
+ str(row.get("coordinate_quality", "")),
62
+ str(row.get("country", "")),
63
+ ]
64
+ ),
65
+ axis=1,
66
+ )
67
  return cases, manifest, quality
68
 
69
 
70
  def _markdown_header(manifest: dict, quality: dict) -> str:
71
  tiers = manifest.get("counts_by_evidence_tier", {})
 
 
72
  return f"""# Mystery Drone Reports Around Sensitive Sites
73
 
74
+ Map-first review surface for public-source reports around military, airport, maritime, emergency-service, and critical-infrastructure contexts.
75
 
76
+ **{manifest.get("case_count", 0)} cases** | **{manifest.get("probable_cluster_count", 0)} probable clusters** | **{quality.get("mappable_case_count", 0)} mapped rows** | **release gate: {"pass" if quality.get("release_grade") else "review"}**
77
 
78
+ Default view groups repeated reports at the same public coordinate so dense clusters, like New Jersey, read as one place-marker with a case list instead of a pile of overlapping dots.
79
 
80
+ Evidence tiers: resolved site `{tiers.get("resolved_sensitive_site_report", 0)}`, named site `{tiers.get("named_sensitive_site_report", 0)}`, source-discovered `{tiers.get("source_discovered_report", 0)}`.
81
 
82
+ Map points are source-indexed report locations, not verified findings of threat, attribution, anomalous origin, or hostile intent. `coordinate_quality` tells you whether a marker is a site, city, region, or country centroid.
83
  """
84
 
85
 
 
87
  return sorted(str(value) for value in values.dropna().astype(str).unique() if str(value))
88
 
89
 
90
+ def _strongest_tier(values: pd.Series) -> str:
91
+ tiers = [str(value) for value in values if str(value)]
92
+ if not tiers:
93
+ return "source_discovered_report"
94
+ return sorted(tiers, key=lambda value: TIER_RANK.get(value, 99))[0]
95
+
96
+
97
+ def _count_text(values: pd.Series, *, label_map: dict[str, str] | None = None, limit: int = 4) -> str:
98
+ counts = values.astype(str).replace("", "unknown").value_counts()
99
+ parts = []
100
+ for key, value in counts.head(limit).items():
101
+ label = label_map.get(key, key) if label_map else key
102
+ parts.append(f"{label}: {int(value)}")
103
+ return "; ".join(parts)
104
+
105
+
106
+ def _date_span(values: pd.Series) -> str:
107
+ dates = sorted(str(value) for value in values if str(value))
108
+ if not dates:
109
+ return "undated"
110
+ if dates[0] == dates[-1]:
111
+ return dates[0]
112
+ return f"{dates[0]} to {dates[-1]}"
113
+
114
+
115
  def _filter_cases(
116
  cases: pd.DataFrame,
117
  evidence_tiers: list[str] | None,
118
+ coordinate_qualities: list[str] | None,
119
  countries: list[str] | None,
120
  site_types: list[str] | None,
121
+ source_domains: list[str] | None,
122
+ years: list[str] | None,
123
  query: str,
124
  ) -> pd.DataFrame:
125
  filtered = cases.copy()
126
  if evidence_tiers:
127
  filtered = filtered[filtered["evidence_tier"].isin(evidence_tiers)]
128
+ if coordinate_qualities:
129
+ filtered = filtered[filtered["coordinate_quality"].isin(coordinate_qualities)]
130
  if countries:
131
  filtered = filtered[filtered["country"].isin(countries)]
132
  if site_types:
133
  filtered = filtered[filtered["site_type"].isin(site_types)]
134
+ if source_domains:
135
+ filtered = filtered[filtered["source_domain"].isin(source_domains)]
136
+ if years:
137
+ filtered = filtered[filtered["report_year"].isin(years)]
138
  query = str(query or "").strip().lower()
139
  if query:
140
  haystack = (
 
142
  + " "
143
  + filtered["site_name"].astype(str)
144
  + " "
145
+ + filtered["plot_label"].astype(str)
146
+ + " "
147
  + filtered["country"].astype(str)
148
  + " "
149
  + filtered["source_domain"].astype(str)
 
152
  return filtered.sort_values(["case_rank"]).reset_index(drop=True)
153
 
154
 
155
+ def _group_cases(filtered: pd.DataFrame) -> pd.DataFrame:
156
+ rows: list[dict] = []
157
+ if filtered.empty:
158
+ return pd.DataFrame(columns=GROUP_COLUMNS + ["map_group_id", "plot_lat", "plot_lon"])
159
+ for group_id, group in filtered.groupby("map_group_id", sort=False):
160
+ strongest = _strongest_tier(group["evidence_tier"])
161
+ rows.append(
162
+ {
163
+ "map_group_id": group_id,
164
+ "case_count": int(len(group)),
165
+ "strongest_evidence_tier": strongest,
166
+ "plot_lat": float(group["plot_lat"].iloc[0]),
167
+ "plot_lon": float(group["plot_lon"].iloc[0]),
168
+ "plot_label": str(group["plot_label"].iloc[0]),
169
+ "coordinate_quality": str(group["coordinate_quality"].iloc[0]),
170
+ "country": str(group["country"].iloc[0]),
171
+ "date_span": _date_span(group["report_date"]),
172
+ "probable_cluster_count": int(group["probable_cluster_id"].nunique()),
173
+ "evidence_mix": _count_text(group["evidence_tier"], label_map=TIER_LABEL),
174
+ "top_source_domains": _count_text(group["source_domain"], limit=3),
175
+ "site_types": _count_text(group["site_type"], limit=3),
176
+ }
177
+ )
178
+ grouped = pd.DataFrame(rows)
179
+ grouped = grouped.sort_values(
180
+ ["case_count", "strongest_evidence_tier", "plot_label"],
181
+ ascending=[False, True, True],
182
+ ).reset_index(drop=True)
183
+ return grouped
184
+
185
+
186
+ def _marker_rows(filtered: pd.DataFrame, mode: str, repeated_only: bool) -> pd.DataFrame:
187
+ working = filtered.copy()
188
+ if mode == "Coarse-location review":
189
+ working = working[working["coordinate_quality"].isin(COARSE_COORDINATE_QUALITIES)]
190
+ if mode == "Individual cases":
191
+ group_sizes = working["map_group_id"].value_counts().to_dict()
192
+ if repeated_only:
193
+ working = working[working["map_group_id"].map(group_sizes).fillna(0) > 1]
194
+ markers = working.copy()
195
+ markers["case_count"] = 1
196
+ markers["strongest_evidence_tier"] = markers["evidence_tier"]
197
+ markers["date_span"] = markers["report_date"]
198
+ markers["probable_cluster_count"] = 1
199
+ markers["evidence_mix"] = markers["evidence_tier"].map(lambda value: TIER_LABEL.get(str(value), str(value)))
200
+ markers["top_source_domains"] = markers["source_domain"]
201
+ return markers.sort_values(["case_rank"]).reset_index(drop=True)
202
+ grouped = _group_cases(working)
203
+ if repeated_only and not grouped.empty:
204
+ grouped = grouped[grouped["case_count"] > 1].reset_index(drop=True)
205
+ return grouped
206
+
207
+
208
+ def _summary_text(filtered: pd.DataFrame, markers: pd.DataFrame, mode: str) -> str:
209
  if filtered.empty:
210
  return "No rows match the current filters."
211
+ precise_count = int((filtered["coordinate_quality"] == "site_centroid").sum())
212
+ grouped_count = int(len(markers))
213
+ largest_stack = int(markers["case_count"].max()) if "case_count" in markers and not markers.empty else 0
214
+ return (
215
+ f"Showing {len(filtered)} cases as {grouped_count} map markers in `{mode}` mode. "
216
+ f"{precise_count} cases use site centroids; the largest visible marker groups {largest_stack} cases. "
217
+ "Marker size is case count; color is strongest evidence tier; symbol is coordinate quality."
218
+ )
219
 
220
 
221
+ def _map(markers: pd.DataFrame, mode: str):
222
+ if markers.empty:
223
+ fig = px.scatter_geo(pd.DataFrame({"plot_lat": [], "plot_lon": []}), lat="plot_lat", lon="plot_lon", height=690)
224
  fig.update_layout(margin={"l": 0, "r": 0, "t": 20, "b": 0})
225
  return fig
 
 
 
 
226
  fig = px.scatter_geo(
227
+ markers,
228
  lat="plot_lat",
229
  lon="plot_lon",
230
+ color="strongest_evidence_tier",
231
  symbol="coordinate_quality",
232
+ size="case_count",
233
+ size_max=36 if mode != "Individual cases" else 14,
234
+ hover_name="plot_label",
235
  hover_data={
236
+ "case_count": True,
237
+ "probable_cluster_count": True,
 
238
  "country": True,
239
+ "date_span": True,
240
+ "evidence_mix": True,
241
+ "top_source_domains": True,
242
  "coordinate_quality": True,
243
  "plot_lat": False,
244
  "plot_lon": False,
245
  },
246
  projection="natural earth",
247
+ height=690,
248
+ color_discrete_map={
249
+ "resolved_sensitive_site_report": "#b42318",
250
+ "named_sensitive_site_report": "#b76e00",
251
+ "source_discovered_report": "#2b6f9e",
252
+ },
253
  )
254
+ fig.update_traces(marker={"opacity": 0.78, "line": {"width": 0.6, "color": "white"}})
255
  fig.update_geos(showland=True, landcolor="#eef2f5", showocean=True, oceancolor="#dfeaf2", showcountries=True)
256
+ fig.update_layout(
257
+ margin={"l": 0, "r": 0, "t": 24, "b": 0},
258
+ legend_orientation="h",
259
+ legend_title_text="Evidence tier / coordinate quality",
260
+ )
261
  return fig
262
 
263
 
264
+ def _cases_for_marker(marker: dict, filtered_rows: list[dict], mode: str) -> list[dict]:
265
+ if mode == "Individual cases":
266
+ case_id = marker.get("case_id")
267
+ return [row for row in filtered_rows if row.get("case_id") == case_id]
268
+ group_id = marker.get("map_group_id")
269
+ return [row for row in filtered_rows if row.get("map_group_id") == group_id]
270
 
271
 
272
+ def _detail(markers: list[dict], filtered_rows: list[dict], index: int | None, mode: str) -> str:
273
+ if not markers:
274
+ return "No map marker selected."
 
 
 
 
 
 
275
  try:
276
+ marker = markers[int(index or 0)]
277
  except (IndexError, TypeError, ValueError):
278
+ marker = markers[0]
279
+ marker_cases = _cases_for_marker(marker, filtered_rows, mode)
280
+ marker_cases = sorted(
281
+ marker_cases,
282
+ key=lambda row: (
283
+ TIER_RANK.get(str(row.get("evidence_tier")), 99),
284
+ str(row.get("report_date", "")),
285
+ int(row.get("case_rank") or 999999),
286
+ ),
287
+ )
288
+ quality = marker.get("coordinate_quality", "")
289
+ warning = ""
290
+ if quality in COARSE_COORDINATE_QUALITIES:
291
+ warning = "\n\n**Coordinate note:** this marker is a coarse centroid. Use it as a review location, not a precise sighting coordinate."
292
+ lines = [
293
+ f"### {marker.get('plot_label', '')}",
294
+ "",
295
+ f"- Map mode: `{mode}`",
296
+ f"- Cases at marker: `{len(marker_cases)}`",
297
+ f"- Probable clusters: `{marker.get('probable_cluster_count', '')}`",
298
+ f"- Evidence mix: {marker.get('evidence_mix', '')}",
299
+ f"- Date span: `{marker.get('date_span', '')}`",
300
+ f"- Coordinate quality: `{quality}`",
301
+ f"- Top source domains: {marker.get('top_source_domains', '')}",
302
+ warning,
303
+ "",
304
+ "#### Cases behind this marker",
305
+ ]
306
+ for row in marker_cases[:18]:
307
+ lines.extend(
308
+ [
309
+ "",
310
+ f"**#{row.get('case_rank')} - {row.get('headline', '')}**",
311
+ f"- `{row.get('evidence_tier', '')}` | `{row.get('report_date', '')}` | `{row.get('site_name', '')}`",
312
+ f"- Source: [{row.get('publisher', '') or row.get('source_domain', '')}]({row.get('source_url', '')})",
313
+ f"- Boundary: {row.get('claim_boundary', '')}",
314
+ ]
315
+ )
316
+ if len(marker_cases) > 18:
317
+ lines.append(f"\n...and {len(marker_cases) - 18} more rows in the marker table/filter result.")
318
+ return "\n".join(line for line in lines if line is not None)
319
+
320
+
321
+ def _render(
322
+ cases: pd.DataFrame,
323
+ evidence_tiers,
324
+ coordinate_qualities,
325
+ countries,
326
+ site_types,
327
+ source_domains,
328
+ years,
329
+ mode,
330
+ repeated_only,
331
+ query,
332
+ ):
333
+ filtered = _filter_cases(cases, evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, query)
334
+ markers = _marker_rows(filtered, mode or "Grouped sites", bool(repeated_only))
335
+ filtered_rows = filtered.to_dict("records")
336
+ marker_records = markers.to_dict("records")
337
+ marker_table_columns = GROUP_COLUMNS if mode != "Individual cases" else CASE_COLUMNS
338
+ marker_table = markers[[column for column in marker_table_columns if column in markers.columns]].copy()
339
+ return (
340
+ _summary_text(filtered, markers, mode or "Grouped sites"),
341
+ _map(markers, mode or "Grouped sites"),
342
+ marker_table,
343
+ marker_records,
344
+ filtered_rows,
345
+ _detail(marker_records, filtered_rows, 0, mode or "Grouped sites"),
346
+ )
347
 
348
 
349
  def build_app(data_dir: str | Path):
350
  data_dir = Path(data_dir)
351
  cases, manifest, quality = _load_data(data_dir)
352
+ evidence_choices = _options(cases["evidence_tier"])
353
+ coordinate_choices = _options(cases["coordinate_quality"])
354
  with gr.Blocks(title="Mystery Drone Reports Around Sensitive Sites") as app:
355
  gr.Markdown(_markdown_header(manifest, quality))
356
  with gr.Row():
357
+ mode = gr.Radio(
358
+ choices=["Grouped sites", "Individual cases", "Coarse-location review"],
359
+ value="Grouped sites",
360
+ label="Map mode",
361
  )
362
+ repeated_only = gr.Checkbox(value=False, label="Only repeated markers")
363
+ query = gr.Textbox(label="Search", placeholder="Try New Jersey, Langley, Copenhagen, airport, military base")
364
+ with gr.Row():
365
+ evidence_filter = gr.CheckboxGroup(choices=evidence_choices, value=evidence_choices, label="Evidence tier")
366
+ coordinate_filter = gr.CheckboxGroup(choices=coordinate_choices, value=coordinate_choices, label="Coordinate quality")
367
+ with gr.Row():
368
+ country_filter = gr.Dropdown(choices=_options(cases["country"]), value=[], multiselect=True, label="Country")
369
+ site_filter = gr.Dropdown(choices=_options(cases["site_type"]), value=[], multiselect=True, label="Site type")
370
+ source_filter = gr.Dropdown(choices=_options(cases["source_domain"]), value=[], multiselect=True, label="Source domain")
371
+ year_filter = gr.Dropdown(choices=_options(cases["report_year"]), value=[], multiselect=True, label="Report year")
 
 
 
372
  summary = gr.Markdown()
373
+ with gr.Row():
374
+ with gr.Column(scale=3):
375
+ map_plot = gr.Plot(label="Grouped case map")
376
+ with gr.Column(scale=2):
377
+ detail = gr.Markdown()
378
+ marker_table = gr.Dataframe(label="Visible map markers", interactive=False)
379
+ marker_rows_state = gr.State([])
380
+ filtered_rows_state = gr.State([])
381
+
382
+ def render(evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, map_mode, repeats, search_query):
383
+ return _render(cases, evidence_tiers, coordinate_qualities, countries, site_types, source_domains, years, map_mode, repeats, search_query)
384
+
385
+ inputs = [
386
+ evidence_filter,
387
+ coordinate_filter,
388
+ country_filter,
389
+ site_filter,
390
+ source_filter,
391
+ year_filter,
392
+ mode,
393
+ repeated_only,
394
+ query,
395
+ ]
396
+ outputs = [summary, map_plot, marker_table, marker_rows_state, filtered_rows_state, detail]
397
+ for control in inputs:
398
+ control.change(render, inputs=inputs, outputs=outputs)
399
+
400
+ def select_marker(markers, filtered_rows, map_mode, evt: gr.SelectData):
401
  if not evt or evt.index is None:
402
+ return _detail(markers, filtered_rows, 0, map_mode)
403
  row_index = evt.index[0] if isinstance(evt.index, (list, tuple)) else evt.index
404
+ return _detail(markers, filtered_rows, row_index, map_mode)
405
 
406
+ marker_table.select(select_marker, inputs=[marker_rows_state, filtered_rows_state, mode], outputs=detail)
407
+ app.load(render, inputs=inputs, outputs=outputs)
 
 
 
 
408
  return app
space_manifest.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "space_bundle_version": "mystery_drone_sensitive_site_space_v1",
3
  "source_release_version": "mystery-drone-sensitive-site-cases-2026-05-v1",
4
  "case_count": 149,
5
  "release_grade": true,
@@ -13,14 +13,14 @@
13
  {
14
  "artifact_role": "space_public_app",
15
  "artifact_path": "public_space_app.py",
16
- "content_sha256": "fa5bdda74630f425a0389dbb89ba43ab1bc81d9302a5a0feeb657d5146ccd172",
17
- "byte_count": 8877
18
  },
19
  {
20
  "artifact_role": "readme",
21
  "artifact_path": "README.md",
22
- "content_sha256": "3dc884fb6fe90ae55d6494c38e365e0660dc68efb2519d8e54f7c14fc19e2669",
23
- "byte_count": 626
24
  },
25
  {
26
  "artifact_role": "requirements",
@@ -47,5 +47,5 @@
47
  "byte_count": 1008
48
  }
49
  ],
50
- "bundle_hash": "89aeb842010180e667afc7a238b888daaf1480ac29ed298735b5f454c67e107b"
51
  }
 
1
  {
2
+ "space_bundle_version": "mystery_drone_sensitive_site_space_v2",
3
  "source_release_version": "mystery-drone-sensitive-site-cases-2026-05-v1",
4
  "case_count": 149,
5
  "release_grade": true,
 
13
  {
14
  "artifact_role": "space_public_app",
15
  "artifact_path": "public_space_app.py",
16
+ "content_sha256": "05ecfaa2d35d3cb16dcfba7a9bc94ea9a19d21a49fe875acc704b50f27c419f1",
17
+ "byte_count": 17863
18
  },
19
  {
20
  "artifact_role": "readme",
21
  "artifact_path": "README.md",
22
+ "content_sha256": "050e3c4fcac1cc1dd91c026f350d2e458d4641c0da2d8ec18c06009a8d7be990",
23
+ "byte_count": 605
24
  },
25
  {
26
  "artifact_role": "requirements",
 
47
  "byte_count": 1008
48
  }
49
  ],
50
+ "bundle_hash": "1efe312db89231fc27a11b2c2e540727fd9196ed7bec35115afee24d474cda6e"
51
  }