Spaces:

cjc0013
/

dronesightings

Running

App Files Files Community

dronesightings / public_space_app.py

cjc0013

Simplify drone Space for public readers

c518d3f verified 22 days ago

raw

history blame contribute delete

23.3 kB

	from __future__ import annotations

	import json
	from pathlib import Path

	import gradio as gr
	import pandas as pd
	import plotly.express as px


	EUROPE_COUNTRIES = {
	"Belgium",
	"Denmark",
	"Germany",
	"Ireland",
	"Italy",
	"Netherlands",
	"Spain",
	"Sweden",
	"United Kingdom",
	}
	CLARITY_LABELS = {
	"resolved_sensitive_site_report": "Specific site matched",
	"named_sensitive_site_report": "Specific site named",
	"source_discovered_report": "News lead to review",
	}
	LOCATION_LABELS = {
	"site_centroid": "Specific site location",
	"city_area_centroid": "City-area location",
	"region_centroid": "General regional location",
	"country_centroid": "Country-level location",
	}
	STORY_CHOICES = [
	"Start here: main storylines",
	"New Jersey coastal/security reports",
	"European airport disruptions",
	"Military base reports",
	"All reports by place",
	]
	REPORT_COLUMNS = [
	"Headline",
	"Date",
	"Place",
	"Place type",
	"Country",
	"Source",
	"Why included",
	"Caution",
	]
	PLACE_COLUMNS = [
	"Place",
	"Reports",
	"Place type",
	"Region",
	"Location note",
	"Date span",
	"Why look here",
	]
	TECH_COLUMNS = [
	"case_id",
	"case_rank",
	"evidence_tier",
	"coordinate_quality",
	"probable_cluster_id",
	"public_row_sha256",
	]


	def _load_data(data_dir: Path) -> tuple[pd.DataFrame, dict, dict]:
	cases = pd.read_csv(data_dir / "mystery_drone_sensitive_site_cases.csv").fillna("")
	manifest = json.loads((data_dir / "release_manifest.json").read_text(encoding="utf-8"))
	quality = json.loads((data_dir / "quality_report.json").read_text(encoding="utf-8"))
	cases["case_rank"] = pd.to_numeric(cases["case_rank"], errors="coerce").fillna(999999).astype(int)
	cases["plot_lat"] = pd.to_numeric(cases["plot_lat"], errors="coerce")
	cases["plot_lon"] = pd.to_numeric(cases["plot_lon"], errors="coerce")
	cases["report_year"] = cases["report_date"].astype(str).str.slice(0, 4).replace("", "Older / unknown")
	cases["reader_clarity"] = cases["evidence_tier"].map(CLARITY_LABELS).fillna("News lead to review")
	cases["location_note"] = cases["coordinate_quality"].map(LOCATION_LABELS).fillna("General location")
	cases["place_type_reader"] = cases.apply(_place_type_label, axis=1)
	cases["region_reader"] = cases["country"].map(_region_label)
	cases["story_group"] = cases.apply(_story_group, axis=1)
	cases["reader_caution"] = cases.apply(_reader_caution, axis=1)
	cases["why_included"] = cases.apply(_why_included, axis=1)
	cases["map_group_id"] = cases.apply(
	lambda row: "\|".join(
	[
	f"{float(row['plot_lat']):.4f}" if pd.notna(row["plot_lat"]) else "",
	f"{float(row['plot_lon']):.4f}" if pd.notna(row["plot_lon"]) else "",
	str(row.get("plot_label", "")),
	str(row.get("place_type_reader", "")),
	str(row.get("country", "")),
	]
	),
	axis=1,
	)
	return cases, manifest, quality


	def _place_type_label(row: pd.Series) -> str:
	text = f"{row.get('site_type', '')} {row.get('site_name', '')} {row.get('plot_label', '')} {row.get('headline', '')}".lower()
	if "airport" in text or "runway" in text:
	return "Airport"
	if "coast guard" in text or "coastal" in text or "maritime" in text or "new jersey" in text:
	return "Coastal/security"
	if "military" in text or "air force" in text or "air base" in text or "arsenal" in text or "raf " in text or "joint base" in text:
	return "Military site"
	if "critical" in text or "infrastructure" in text or "nuclear" in text or "power" in text:
	return "Critical infrastructure"
	return "Other / unclear"


	def _region_label(country: str) -> str:
	if country == "United States":
	return "United States"
	if country in EUROPE_COUNTRIES:
	return "Europe"
	return "Other / unclear"


	def _story_group(row: pd.Series) -> str:
	text = f"{row.get('headline', '')} {row.get('site_name', '')} {row.get('plot_label', '')} {row.get('country', '')}".lower()
	if "new jersey" in text or "coast guard" in text:
	return "New Jersey coastal/security reports"
	if row.get("region_reader") == "Europe" and ("airport" in text or row.get("place_type_reader") == "Airport"):
	return "European airport disruptions"
	if row.get("place_type_reader") == "Military site":
	return "Military base reports"
	return "All reports by place"


	def _reader_caution(row: pd.Series) -> str:
	clarity = row.get("reader_clarity", "")
	location = row.get("location_note", "")
	if clarity == "News lead to review":
	return "Treat as a source lead, not a confirmed event."
	if location != "Specific site location":
	return "Location is approximate."
	return "Check the linked source before drawing conclusions."


	def _why_included(row: pd.Series) -> str:
	clarity = row.get("reader_clarity", "")
	place_type = row.get("place_type_reader", "")
	if clarity == "Specific site matched":
	return f"Matched to a {place_type.lower()} report location."
	if clarity == "Specific site named":
	return f"The source names a {place_type.lower()} or sensitive place."
	return f"The source language points to a drone report near a {place_type.lower()} context."


	def _date_span(values: pd.Series) -> str:
	dates = sorted(str(value) for value in values if str(value))
	if not dates:
	return "Date unclear"
	if dates[0] == dates[-1]:
	return dates[0]
	return f"{dates[0]} to {dates[-1]}"


	def _count_text(values: pd.Series, limit: int = 4) -> str:
	counts = values.astype(str).replace("", "unknown").value_counts()
	return ", ".join(f"{key}: {int(value)}" for key, value in counts.head(limit).items())


	def _header(manifest: dict) -> str:
	named_or_matched = int(manifest.get("resolved_sensitive_site_report_count", 0)) + int(
	manifest.get("named_sensitive_site_report_count", 0)
	)
	leads = int(manifest.get("source_discovered_report_count", 0))
	return f"""# Mystery Drone Reports Near Sensitive Places

	This is a public-source index of news reports near airports, military sites, coastal/security areas, and other sensitive places. It is not proof of threat, intent, or unusual origin.

	{manifest.get("case_count", 0)} public-source reports \| {named_or_matched} name or match a specific sensitive site \| {leads} broader leads for follow-up
	"""


	def _story_intro(story: str, rows: pd.DataFrame) -> str:
	if rows.empty:
	return "No reports match this storyline."
	places = _count_text(rows["plot_label"], limit=5)
	sources = _count_text(rows["source_domain"], limit=5)
	dates = _date_span(rows["report_date"])
	location_note = "Some markers are approximate because public reports often describe areas rather than exact coordinates."
	if story == "New Jersey coastal/security reports":
	lead = "This group collects public reports connected to the New Jersey drone wave and nearby coastal/security locations."
	caution = "Many rows are broad reporting leads, so treat this as a reporting trail rather than a confirmed incident list."
	elif story == "European airport disruptions":
	lead = "This group follows reports around European airport disruptions and related drone activity."
	caution = "Airport closures and disruption reports can involve repeated follow-up stories, so use the source links to separate event reports from later context."
	elif story == "Military base reports":
	lead = "This group focuses on reports that name or point toward military bases and military-site areas."
	caution = "A report near a base does not prove origin, intent, or threat."
	elif story == "All reports by place":
	lead = "This view groups the full report set by place so repeated locations are easier to scan."
	caution = "Marker size means number of source reports, not number of confirmed objects."
	else:
	lead = "Pick a storyline below to explore the main reporting trails."
	caution = "Start with the story summaries, then use the map and sources for details."
	return f"""## {story}

	{lead}

	- Reports in view: {len(rows)}
	- Date range: {dates}
	- Common places: {places}
	- Common sources: {sources}

	What this does not prove: {caution}

	Location note: {location_note}
	"""


	def _story_rows(cases: pd.DataFrame, story: str) -> pd.DataFrame:
	if story == "Start here: main storylines":
	return cases.copy()
	if story == "All reports by place":
	return cases.copy()
	return cases[cases["story_group"] == story].copy()


	def _filter_rows(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str) -> pd.DataFrame:
	rows = cases.copy()
	if region and region != "All":
	rows = rows[rows["region_reader"] == region]
	if place_type and place_type != "All":
	rows = rows[rows["place_type_reader"] == place_type]
	if clarity and clarity != "All":
	rows = rows[rows["reader_clarity"] == clarity]
	if year and year != "All":
	if year == "Older / unknown":
	rows = rows[~rows["report_year"].isin(["2024", "2025", "2026"])]
	else:
	rows = rows[rows["report_year"] == year]
	search = str(search or "").strip().lower()
	if search:
	haystack = (
	rows["headline"].astype(str)
	+ " "
	+ rows["site_name"].astype(str)
	+ " "
	+ rows["plot_label"].astype(str)
	+ " "
	+ rows["country"].astype(str)
	+ " "
	+ rows["source_domain"].astype(str)
	).str.lower()
	rows = rows[haystack.str.contains(search, regex=False)]
	return rows.sort_values(["case_rank"]).reset_index(drop=True)


	def _group_rows(rows: pd.DataFrame) -> pd.DataFrame:
	out: list[dict] = []
	if rows.empty:
	return pd.DataFrame(columns=["Place", "Reports", "Place type", "Region", "Location note", "Date span", "Why look here", "map_group_id", "plot_lat", "plot_lon"])
	for group_id, group in rows.groupby("map_group_id", sort=False):
	out.append(
	{
	"map_group_id": group_id,
	"Place": str(group["plot_label"].iloc[0]),
	"Reports": int(len(group)),
	"Place type": str(group["place_type_reader"].iloc[0]),
	"Region": str(group["region_reader"].iloc[0]),
	"Location note": str(group["location_note"].iloc[0]),
	"Date span": _date_span(group["report_date"]),
	"Why look here": _count_text(group["reader_clarity"], limit=3),
	"plot_lat": float(group["plot_lat"].iloc[0]),
	"plot_lon": float(group["plot_lon"].iloc[0]),
	"source_summary": _count_text(group["source_domain"], limit=3),
	}
	)
	grouped = pd.DataFrame(out)
	return grouped.sort_values(["Reports", "Place"], ascending=[False, True]).reset_index(drop=True)


	def _map(groups: pd.DataFrame):
	if groups.empty:
	fig = px.scatter_geo(pd.DataFrame({"plot_lat": [], "plot_lon": []}), lat="plot_lat", lon="plot_lon", height=560)
	fig.update_layout(margin={"l": 0, "r": 0, "t": 12, "b": 0})
	return fig
	fig = px.scatter_geo(
	groups,
	lat="plot_lat",
	lon="plot_lon",
	color="Place type",
	size="Reports",
	size_max=38,
	hover_name="Place",
	hover_data={
	"Reports": True,
	"Region": True,
	"Location note": True,
	"Date span": True,
	"Why look here": True,
	"source_summary": True,
	"plot_lat": False,
	"plot_lon": False,
	},
	projection="natural earth",
	height=560,
	color_discrete_map={
	"Airport": "#1f77b4",
	"Military site": "#b42318",
	"Coastal/security": "#2e7d62",
	"Critical infrastructure": "#8e5ea2",
	"Other / unclear": "#6b7280",
	},
	)
	fig.update_traces(marker={"opacity": 0.8, "line": {"width": 0.6, "color": "white"}})
	fig.update_geos(showland=True, landcolor="#eef2f5", showocean=True, oceancolor="#dfeaf2", showcountries=True)
	fig.update_layout(margin={"l": 0, "r": 0, "t": 18, "b": 0}, legend_title_text="Place type")
	return fig


	def _public_table(rows: pd.DataFrame) -> pd.DataFrame:
	if rows.empty:
	return pd.DataFrame(columns=REPORT_COLUMNS)
	return pd.DataFrame(
	{
	"Headline": rows["headline"],
	"Date": rows["report_date"].replace("", "Date unclear"),
	"Place": rows["plot_label"],
	"Place type": rows["place_type_reader"],
	"Country": rows["country"].replace("", "unknown"),
	"Source": rows["source_domain"],
	"Why included": rows["why_included"],
	"Caution": rows["reader_caution"],
	}
	)


	def _source_cards(rows: pd.DataFrame, limit: int = 10) -> str:
	if rows.empty:
	return "No reports match this view."
	lines = ["## Source links to inspect", ""]
	for _, row in rows.head(limit).iterrows():
	lines.extend(
	[
	f"### {row['headline']}",
	f"- Date: {row['report_date'] or 'Date unclear'}",
	f"- Place: {row['plot_label']} ({row['location_note']})",
	f"- Why included: {row['why_included']}",
	f"- Caution: {row['reader_caution']}",
	f"- Source: [{row['publisher'] or row['source_domain']}]({row['source_url']})",
	"",
	]
	)
	if len(rows) > limit:
	lines.append(f"...and {len(rows) - limit} more reports in the list.")
	return "\n".join(lines)


	def _story_card_markdown(cases: pd.DataFrame) -> str:
	cards = []
	for story in STORY_CHOICES[1:]:
	rows = _story_rows(cases, story)
	if story == "All reports by place":
	subtitle = "Scan every mapped report grouped by place."
	elif story == "New Jersey coastal/security reports":
	subtitle = "The largest reporting trail in this release."
	elif story == "European airport disruptions":
	subtitle = "Airport closures and disruption reports across Europe."
	else:
	subtitle = "Reports around bases and military-site areas."
	cards.append(f"{story} - {len(rows)} reports. {subtitle}")
	return "## Pick a storyline to explore\n\n" + "\n\n".join(cards)


	def _render_story(cases: pd.DataFrame, story: str):
	rows = _story_rows(cases, story)
	groups = _group_rows(rows)
	intro = _header_from_rows(cases) + "\n\n" + _story_card_markdown(cases) if story == "Start here: main storylines" else _story_intro(story, rows)
	return intro, _map(groups), groups[PLACE_COLUMNS], _public_table(rows), _source_cards(rows)


	def _header_from_rows(cases: pd.DataFrame) -> str:
	specific = int((cases["reader_clarity"].isin(["Specific site matched", "Specific site named"])).sum())
	leads = int((cases["reader_clarity"] == "News lead to review").sum())
	return f"""# Mystery Drone Reports Near Sensitive Places

	This is a public-source index of news reports near airports, military sites, coastal/security areas, and other sensitive places.

	It is not proof of threat, intent, or unusual origin.

	{len(cases)} public-source reports \| {specific} name or match a specific sensitive site \| {leads} broader leads for follow-up
	"""


	def _render_map(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str):
	rows = _filter_rows(cases, search, region, place_type, clarity, year)
	groups = _group_rows(rows)
	summary = (
	f"Showing {len(rows)} reports at {len(groups)} places. "
	"Bigger markers mean more reports at that place. Colors show the kind of place."
	)
	return summary, _map(groups), groups[PLACE_COLUMNS], _public_table(rows), _source_cards(rows)


	def _render_reports(cases: pd.DataFrame, search: str, region: str, place_type: str, clarity: str, year: str):
	rows = _filter_rows(cases, search, region, place_type, clarity, year)
	summary = f"Showing {len(rows)} reports. Select a row by using the source links in the detail panel below."
	return summary, _public_table(rows), _source_cards(rows), _technical_table(rows)


	def _technical_table(rows: pd.DataFrame) -> pd.DataFrame:
	if rows.empty:
	return pd.DataFrame(columns=TECH_COLUMNS)
	return rows[TECH_COLUMNS].copy()


	def _data_notes(manifest: dict, quality: dict) -> str:
	return f"""# Data notes

	This Space keeps the technical classifications available, but keeps them out of the first screen.

	- Release version: {manifest.get('release_version')}
	- Public rows: {manifest.get('case_count')}
	- Quality gate passed: {quality.get('release_grade')}
	- Duplicate source URLs: {quality.get('duplicate_source_url_count')}
	- Missing source URLs: {quality.get('missing_source_url_count')}
	- Mappable rows: {quality.get('mappable_case_count')}

	Plain-language translations:

	- Specific site matched = stricter source/site matching found a sensitive-site report.
	- Specific site named = the source names a sensitive site, but it still needs review.
	- News lead to review = public source language suggests a relevant report, but this is a lead, not a confirmed event.
	- Specific site location = marker uses a known site point.
	- General regional location or country-level location = marker is approximate.
	"""


	def build_app(data_dir: str \| Path):
	data_dir = Path(data_dir)
	cases, manifest, quality = _load_data(data_dir)
	region_choices = ["All", "United States", "Europe", "Other / unclear"]
	place_choices = ["All", "Airport", "Military site", "Coastal/security", "Critical infrastructure", "Other / unclear"]
	clarity_choices = ["All", "Specific site matched", "Specific site named", "News lead to review"]
	year_choices = ["All", "2026", "2025", "2024", "Older / unknown"]

	with gr.Blocks(title="Mystery Drone Reports Near Sensitive Places") as app:
	with gr.Tab("Start here"):
	story = gr.Radio(choices=STORY_CHOICES, value=STORY_CHOICES[0], label="Pick a storyline")
	story_intro = gr.Markdown()
	with gr.Row():
	story_map = gr.Plot(label="Story map")
	story_sources = gr.Markdown()
	story_places = gr.Dataframe(label="Places in this story", interactive=False)
	story_reports = gr.Dataframe(label="Reports in this story", interactive=False)
	story.change(
	lambda selected: _render_story(cases, selected),
	inputs=story,
	outputs=[story_intro, story_map, story_places, story_reports, story_sources],
	)
	app.load(
	lambda: _render_story(cases, STORY_CHOICES[0]),
	outputs=[story_intro, story_map, story_places, story_reports, story_sources],
	)

	with gr.Tab("Map"):
	gr.Markdown("## Map\n\nBigger markers mean more public-source reports at that place. Colors show the kind of place.")
	with gr.Row():
	map_search = gr.Textbox(label="Search", placeholder="Search a place, country, source, or headline")
	map_region = gr.Dropdown(choices=region_choices, value="All", label="Region")
	map_place = gr.Dropdown(choices=place_choices, value="All", label="Place type")
	map_clarity = gr.Dropdown(choices=clarity_choices, value="All", label="Report clarity")
	map_year = gr.Dropdown(choices=year_choices, value="All", label="Time")
	map_summary = gr.Markdown()
	map_plot = gr.Plot(label="Report map")
	map_places = gr.Dataframe(label="Places shown on the map", interactive=False)
	map_reports = gr.Dataframe(label="Reports shown by current filters", interactive=False)
	map_sources = gr.Markdown()
	map_inputs = [map_search, map_region, map_place, map_clarity, map_year]
	for control in map_inputs:
	control.change(
	lambda search, region, place, clarity, year: _render_map(cases, search, region, place, clarity, year),
	inputs=map_inputs,
	outputs=[map_summary, map_plot, map_places, map_reports, map_sources],
	)
	app.load(
	lambda: _render_map(cases, "", "All", "All", "All", "All"),
	outputs=[map_summary, map_plot, map_places, map_reports, map_sources],
	)

	with gr.Tab("Reports"):
	gr.Markdown("## All reports\n\nUse this when you want source links and row-level cautions.")
	with gr.Row():
	report_search = gr.Textbox(label="Search", placeholder="Search a place, country, source, or headline")
	report_region = gr.Dropdown(choices=region_choices, value="All", label="Region")
	report_place = gr.Dropdown(choices=place_choices, value="All", label="Place type")
	report_clarity = gr.Dropdown(choices=clarity_choices, value="All", label="Report clarity")
	report_year = gr.Dropdown(choices=year_choices, value="All", label="Time")
	report_summary = gr.Markdown()
	report_table = gr.Dataframe(label="Readable report list", interactive=False)
	report_sources = gr.Markdown()
	with gr.Accordion("Show technical fields", open=False):
	technical_table = gr.Dataframe(label="Technical row fields", interactive=False)
	report_inputs = [report_search, report_region, report_place, report_clarity, report_year]
	for control in report_inputs:
	control.change(
	lambda search, region, place, clarity, year: _render_reports(cases, search, region, place, clarity, year),
	inputs=report_inputs,
	outputs=[report_summary, report_table, report_sources, technical_table],
	)
	app.load(
	lambda: _render_reports(cases, "", "All", "All", "All", "All"),
	outputs=[report_summary, report_table, report_sources, technical_table],
	)

	with gr.Tab("Data notes"):
	gr.Markdown(_data_notes(manifest, quality))
	with gr.Accordion("Technical manifest", open=False):
	gr.JSON(manifest)
	with gr.Accordion("Quality report", open=False):
	gr.JSON(quality)
	return app