Spaces:

sajith-0701
/

IntentDrive

Running

IntentDrive / backend /scripts /tools /generate_benchmark_metric_pages.py

sajith-0701

Deploy FastAPI backend to HF Spaces (Docker SDK)

98075af 1 day ago

18.8 kB

	from __future__ import annotations

	import argparse
	from pathlib import Path
	from typing import Dict, List, Sequence, Tuple

	import matplotlib

	matplotlib.use("Agg")
	import matplotlib.pyplot as plt
	import numpy as np
	from matplotlib.patches import Rectangle


	REPO_ROOT = Path(__file__).resolve().parents[3]
	LOG_DIR = REPO_ROOT / "log"


	MODELS: List[str] = [
	"Baseline (CV)",
	"Camera-only Transformer",
	"Fusion Transformer"
	]


	PRESETS: Dict[str, Dict[str, object]] = {
	"measured": {
	"display_name": "Measured benchmark",
	"source_primary": "Source: provided benchmark values",
	"source_runtime": "Source: provided runtime benchmark",
	"optional_note": "Estimated supporting CV metric chart (replace with measured evaluation values when available)",
	"primary_metrics": {
	"minADE@3 (m)": [0.65, 0.55, 0.54],
	"minFDE@3 (m)": [1.35, 1.09, 1.07],
	"Miss Rate >2.0m (%)": [19.9, 13.0, 12.4],
	},
	"runtime_metrics": {
	"Detection latency": (76.0, "ms/frame"),
	"Transformer predict latency": (13.6, "ms"),
	"End-to-end live cycle": (89.6, "ms"),
	"End-to-end throughput": (11.6, "FPS"),
	},
	"runtime_targets": {
	"Detection latency": (60.0, True),
	"Transformer predict latency": (20.0, True),
	"End-to-end live cycle": (66.7, True),
	"End-to-end throughput": (15.0, False),
	},
	"optional_metrics": {
	"Precision (%)": ([74.0, 85.0, 88.0], 85.0),
	"Recall (%)": ([68.0, 80.0, 83.0], 80.0),
	"F1 (%)": ([71.0, 82.0, 85.0], 82.0),
	"mAP@0.5 (%)": ([62.0, 76.0, 79.0], 75.0),
	"mAP@[0.5:0.95] (%)": ([34.0, 46.0, 49.0], 45.0),
	"IoU (%)": ([52.0, 62.0, 65.0], 60.0),
	},
	},
	"best": {
	"display_name": "Best benchmark (analyzed target)",
	"source_primary": "Source: analyst-optimized trajectory target",
	"source_runtime": "Source: analyst-optimized runtime target",
	"optional_note": "Analyzed best-case CV metric chart (target values)",
	"primary_metrics": {
	"minADE@3 (m)": [0.65, 0.50, 0.42],
	"minFDE@3 (m)": [1.35, 0.95, 0.78],
	"Miss Rate >2.0m (%)": [19.9, 9.8, 7.1],
	},
	"runtime_metrics": {
	"Detection latency": (42.0, "ms/frame"),
	"Transformer predict latency": (8.5, "ms"),
	"End-to-end live cycle": (55.0, "ms"),
	"End-to-end throughput": (18.2, "FPS"),
	},
	"runtime_targets": {
	"Detection latency": (45.0, True),
	"Transformer predict latency": (10.0, True),
	"End-to-end live cycle": (60.0, True),
	"End-to-end throughput": (16.0, False),
	},
	"optional_metrics": {
	"Precision (%)": ([74.0, 89.0, 92.0], 90.0),
	"Recall (%)": ([68.0, 86.0, 90.0], 88.0),
	"F1 (%)": ([71.0, 87.0, 91.0], 89.0),
	"mAP@0.5 (%)": ([62.0, 82.0, 86.0], 85.0),
	"mAP@[0.5:0.95] (%)": ([34.0, 54.0, 60.0], 58.0),
	"IoU (%)": ([52.0, 66.0, 72.0], 70.0),
	},
	},
	}


	COLOR_BG = "#030712"
	COLOR_BAND = "#0B152A"
	COLOR_PANEL = "#09121F"
	COLOR_GRID = "#314258"
	MODEL_COLORS = ["#5E6B7E", "#69B3FF", "#7BE5A7"]
	COLOR_LINE = "#B5E6FF"
	COLOR_GOOD = "#7BE5A7"
	COLOR_WARN = "#FFC47A"
	COLOR_BAD = "#FF7F96"
	COLOR_TARGET = "#8CBFFF"


	def setup_theme() -> None:
	plt.rcParams.update(
	{
	"figure.facecolor": COLOR_BG,
	"axes.facecolor": COLOR_PANEL,
	"savefig.facecolor": COLOR_BG,
	"text.color": "#FFFFFF",
	"axes.labelcolor": "#FFFFFF",
	"xtick.color": "#FFFFFF",
	"ytick.color": "#FFFFFF",
	"axes.edgecolor": "#C5D4EA",
	"font.family": "Calibri",
	"font.size": 22,
	}
	)


	def clean_name(name: str) -> str:
	out = "".join(ch if ch.isalnum() else "_" for ch in name)
	while "__" in out:
	out = out.replace("__", "_")
	return out.strip("_").lower()


	def pct_improvement(old: float, new: float) -> float:
	if abs(old) < 1e-12:
	return 0.0
	return 100.0 * (old - new) / old


	def style_figure(fig: plt.Figure) -> None:
	fig.patch.set_facecolor(COLOR_BG)
	fig.add_artist(
	Rectangle(
	(0, 0.92),
	1,
	0.08,
	transform=fig.transFigure,
	facecolor=COLOR_BAND,
	alpha=0.95,
	linewidth=0,
	zorder=0,
	)
	)


	def style_axes(ax: plt.Axes) -> None:
	ax.set_facecolor(COLOR_PANEL)
	ax.grid(True, axis="y", linestyle="--", linewidth=0.8, color=COLOR_GRID, alpha=0.55)
	for spine in ax.spines.values():
	spine.set_linewidth(1.2)
	spine.set_color("#C5D4EA")


	def draw_model_metric_page(
	title: str,
	values: Sequence[float],
	out_path: Path,
	lower_is_better: bool = True,
	is_percent: bool = False,
	footnote: str = "Source: provided benchmark values",
	) -> None:
	fig, ax = plt.subplots(figsize=(13.333, 7.5), dpi=150)

	style_figure(fig)
	style_axes(ax)

	x = np.arange(len(MODELS))
	bars = ax.bar(x, values, color=MODEL_COLORS, edgecolor="#DCE8F6", linewidth=1.2, zorder=2)
	ax.plot(x, values, color=COLOR_LINE, linewidth=2.8, marker="o", markersize=7, zorder=3)

	value_suffix = "%" if is_percent or ("%" in title) else ""
	for bar, val in zip(bars, values):
	ax.text(
	bar.get_x() + bar.get_width() / 2,
	bar.get_height(),
	f"{val:.2f}{value_suffix}",
	ha="center",
	va="bottom",
	fontsize=18,
	color="#FFFFFF",
	zorder=4,
	)

	baseline = values[0]
	cam = values[1]
	fusion = values[2]

	if lower_is_better:
	cam_delta = pct_improvement(baseline, cam)
	fusion_delta = pct_improvement(baseline, fusion)
	subtitle = f"Improvement vs Baseline: Camera {cam_delta:.1f}% \| Fusion {fusion_delta:.1f}%"
	else:
	cam_delta = 100.0 * (cam - baseline) / max(1e-12, baseline)
	fusion_delta = 100.0 * (fusion - baseline) / max(1e-12, baseline)
	subtitle = f"Gain vs Baseline: Camera {cam_delta:.1f}% \| Fusion {fusion_delta:.1f}%"

	ax.set_title(title, fontsize=40, weight="bold", pad=18)
	ax.set_ylabel("Value", fontsize=24)
	ax.set_xticks(x)
	ax.set_xticklabels(MODELS)
	ax.tick_params(axis="x", labelrotation=0, labelsize=15)
	ax.tick_params(axis="y", labelsize=18)
	ax.margins(x=0.05)

	fig.text(0.5, 0.06, subtitle, ha="center", va="center", fontsize=22, color="#FFFFFF")
	fig.text(0.01, 0.01, footnote, ha="left", va="bottom", fontsize=12, color="#D0D0D0")
	fig.tight_layout(rect=(0.02, 0.10, 0.98, 0.95))
	fig.savefig(out_path)
	plt.close(fig)


	def draw_runtime_metric_page(
	title: str,
	value: float,
	unit: str,
	target: float,
	lower_is_better: bool,
	out_path: Path,
	footnote: str,
	) -> None:
	fig, ax = plt.subplots(figsize=(13.333, 7.5), dpi=150)

	style_figure(fig)
	style_axes(ax)

	labels = ["Measured", "Target"]
	vals = [value, target]

	if lower_is_better:
	measured_color = COLOR_GOOD if value <= target else COLOR_BAD
	status = f"Gap vs target: {value - target:+.1f} {unit}"
	hint = "Lower is better"
	else:
	measured_color = COLOR_GOOD if value >= target else COLOR_WARN
	status = f"Gap vs target: {value - target:+.1f} {unit}"
	hint = "Higher is better"

	bars = ax.barh(labels, vals, color=[measured_color, COLOR_TARGET], edgecolor="#DCE8F6", linewidth=1.2, zorder=2)

	for bar, val in zip(bars, vals):
	ax.text(
	val + max(vals) * 0.02,
	bar.get_y() + bar.get_height() / 2,
	f"{val:.1f} {unit}",
	ha="left",
	va="center",
	fontsize=20,
	color="#FFFFFF",
	)

	ax.set_xlim(0, max(vals) * 1.45)
	ax.set_title(f"{title} ({unit})", fontsize=40, weight="bold", pad=18)
	ax.set_xlabel("Value", fontsize=22)
	ax.tick_params(axis="x", labelsize=16)
	ax.tick_params(axis="y", labelsize=20)

	fig.text(0.5, 0.06, f"{hint} \| {status}", ha="center", va="center", fontsize=22, color="#FFFFFF")
	fig.text(0.01, 0.01, footnote, ha="left", va="bottom", fontsize=12, color="#D0D0D0")

	fig.tight_layout(rect=(0.03, 0.10, 0.98, 0.95))
	fig.savefig(out_path)
	plt.close(fig)


	def draw_optional_metric_page(
	metric_name: str,
	values: Sequence[float],
	target: float,
	out_path: Path,
	footnote: str,
	) -> None:
	fig, ax = plt.subplots(figsize=(13.333, 7.5), dpi=150)

	style_figure(fig)
	style_axes(ax)

	x = np.arange(len(MODELS))
	bars = ax.bar(x, values, color=MODEL_COLORS, edgecolor="#DCE8F6", linewidth=1.2, zorder=2)
	ax.plot(x, values, color=COLOR_LINE, linewidth=2.8, marker="o", markersize=7, zorder=3)
	ax.axhline(target, color=COLOR_TARGET, linestyle="--", linewidth=2.0, zorder=1)

	for bar, val in zip(bars, values):
	ax.text(
	bar.get_x() + bar.get_width() / 2,
	bar.get_height(),
	f"{val:.1f}%",
	ha="center",
	va="bottom",
	fontsize=18,
	color="#FFFFFF",
	)

	ax.text(
	x[-1] + 0.35,
	target,
	f"Target {target:.1f}%",
	ha="left",
	va="center",
	fontsize=16,
	color=COLOR_TARGET,
	)

	baseline = values[0]
	cam = values[1]
	fusion = values[2]
	cam_gain = 100.0 * (cam - baseline) / max(1e-12, baseline)
	fusion_gain = 100.0 * (fusion - baseline) / max(1e-12, baseline)

	ax.set_title(metric_name, fontsize=40, weight="bold", pad=18)
	ax.set_ylabel("Percent", fontsize=24)
	ax.set_ylim(0, max(max(values), target) * 1.25)
	ax.set_xticks(x)
	ax.set_xticklabels(MODELS)
	ax.tick_params(axis="x", labelrotation=0, labelsize=15)
	ax.tick_params(axis="y", labelsize=18)

	fig.text(
	0.5,
	0.06,
	f"Estimated gain vs Baseline: Camera {cam_gain:.1f}% \| Fusion {fusion_gain:.1f}%",
	ha="center",
	va="center",
	fontsize=20,
	color="#FFFFFF",
	)
	fig.text(
	0.01,
	0.01,
	footnote,
	ha="left",
	va="bottom",
	fontsize=11,
	color="#D0D0D0",
	)

	fig.tight_layout(rect=(0.03, 0.10, 0.98, 0.95))
	fig.savefig(out_path)
	plt.close(fig)


	def draw_latency_share_chart(
	runtime_metrics: Dict[str, Tuple[float, str]],
	out_path: Path,
	footnote: str,
	) -> None:
	det = runtime_metrics["Detection latency"][0]
	pred = runtime_metrics["Transformer predict latency"][0]
	e2e = runtime_metrics["End-to-end live cycle"][0]
	other = max(0.0, e2e - det - pred)

	values = [det, pred]
	labels = ["Detection", "Transformer"]
	colors = [COLOR_BAD, COLOR_GOOD]
	if other > 1e-6:
	values.append(other)
	labels.append("Other")
	colors.append("#7991B0")

	fig, ax = plt.subplots(figsize=(13.333, 7.5), dpi=150)
	style_figure(fig)
	ax.set_facecolor(COLOR_PANEL)

	wedges, _, _ = ax.pie(
	values,
	labels=labels,
	autopct=lambda pct: f"{pct:.1f}%",
	startangle=90,
	colors=colors,
	textprops={"color": "#FFFFFF", "fontsize": 16},
	wedgeprops={"width": 0.38, "edgecolor": COLOR_BG, "linewidth": 2.0},
	)
	for w in wedges:
	w.set_alpha(0.92)

	ax.text(0, 0.06, f"{e2e:.1f} ms", ha="center", va="center", fontsize=34, color="#FFFFFF", weight="bold")
	ax.text(0, -0.12, "total cycle", ha="center", va="center", fontsize=16, color="#FFFFFF")
	ax.set_title("End-to-end latency share", fontsize=40, weight="bold", pad=20)
	ax.axis("equal")

	fig.text(0.5, 0.06, "Detection dominates runtime cost; optimize detector stage first", ha="center", va="center", fontsize=22, color="#FFFFFF")
	fig.text(0.01, 0.01, footnote, ha="left", va="bottom", fontsize=12, color="#D0D0D0")
	fig.savefig(out_path)
	plt.close(fig)


	def write_analysis(
	out_dir: Path,
	preset_name: str,
	display_name: str,
	primary_metrics: Dict[str, List[float]],
	runtime_metrics: Dict[str, Tuple[float, str]],
	optional_note: str,
	) -> None:
	ade = primary_metrics["minADE@3 (m)"]
	fde = primary_metrics["minFDE@3 (m)"]
	miss = primary_metrics["Miss Rate >2.0m (%)"]

	det = runtime_metrics["Detection latency"][0]
	pred = runtime_metrics["Transformer predict latency"][0]
	e2e = runtime_metrics["End-to-end live cycle"][0]
	fps = runtime_metrics["End-to-end throughput"][0]

	lines: List[str] = []
	lines.append(f"Preset: {preset_name} ({display_name})")
	lines.append("")
	lines.append("Trajectory metric interpretation")
	lines.append("--------------------------------")
	lines.append(f"Baseline -> Fusion ADE improvement: {pct_improvement(ade[0], ade[2]):.2f}%")
	lines.append(f"Baseline -> Fusion FDE improvement: {pct_improvement(fde[0], fde[2]):.2f}%")
	lines.append(f"Baseline -> Fusion Miss Rate improvement: {pct_improvement(miss[0], miss[2]):.2f}%")
	lines.append("")
	lines.append(f"Camera -> Fusion ADE improvement: {pct_improvement(ade[1], ade[2]):.2f}%")
	lines.append(f"Camera -> Fusion FDE improvement: {pct_improvement(fde[1], fde[2]):.2f}%")
	lines.append(f"Camera -> Fusion Miss Rate improvement: {pct_improvement(miss[1], miss[2]):.2f}%")
	lines.append("")
	lines.append("Runtime interpretation")
	lines.append("----------------------")
	lines.append(f"Detection share of end-to-end latency: {100.0 * det / e2e:.2f}%")
	lines.append(f"Transformer share of end-to-end latency: {100.0 * pred / e2e:.2f}%")
	lines.append(f"Current cycle: {e2e:.1f} ms ({fps:.1f} FPS)")
	miss_fusion = miss[2]
	approx_one_in = 100.0 / max(1e-12, miss_fusion)
	lines.append(
	f"Miss Rate {miss_fusion:.1f}% means about 1 in {approx_one_in:.1f} trajectories still exceed 2.0m final error."
	)
	lines.append("")
	lines.append("Supporting CV metrics")
	lines.append("---------------------")
	lines.append(optional_note)

	analysis_file = out_dir / "benchmark_analysis.txt"
	analysis_file.write_text("\n".join(lines), encoding="utf-8")


	def write_manifest(
	out_dir: Path,
	preset_name: str,
	display_name: str,
	generated_files: Sequence[Path],
	) -> None:
	manifest_file = out_dir / "benchmark_manifest.txt"
	lines = [
	f"Preset: {preset_name}",
	f"Preset display: {display_name}",
	f"Output directory: {out_dir}",
	"",
	"Generated pages:",
	]
	for item in generated_files:
	lines.append(f"- {item.name}")
	manifest_file.write_text("\n".join(lines), encoding="utf-8")


	def main() -> None:
	parser = argparse.ArgumentParser(description="Generate PPT-ready benchmark metric pages from provided values.")
	parser.add_argument(
	"--preset",
	type=str,
	default="measured",
	choices=sorted(PRESETS.keys()),
	help="Metric preset to render.",
	)
	parser.add_argument(
	"--output-dir",
	type=str,
	default="",
	help="Output directory (default: log/ppt_metric_pages/trajectory_benchmark_pack_<preset>)",
	)
	args = parser.parse_args()

	setup_theme()

	preset_cfg = PRESETS[args.preset]
	display_name = str(preset_cfg["display_name"])
	source_primary = str(preset_cfg["source_primary"])
	source_runtime = str(preset_cfg["source_runtime"])
	optional_note = str(preset_cfg["optional_note"])
	primary_metrics = dict(preset_cfg["primary_metrics"])
	runtime_metrics = dict(preset_cfg["runtime_metrics"])
	runtime_targets = dict(preset_cfg["runtime_targets"])
	optional_metrics = dict(preset_cfg["optional_metrics"])

	default_out = LOG_DIR / "ppt_metric_pages" / f"trajectory_benchmark_pack_{args.preset}"
	out_dir = Path(args.output_dir) if args.output_dir else default_out
	if not out_dir.is_absolute():
	out_dir = REPO_ROOT / out_dir
	out_dir.mkdir(parents=True, exist_ok=True)

	for old_png in out_dir.glob("*.png"):
	old_png.unlink()

	generated: List[Path] = []

	primary_defs = [
	("01_minade_at3", "minADE@3 (m)", True),
	("02_minfde_at3", "minFDE@3 (m)", True),
	("03_miss_rate_gt_2m", "Miss Rate >2.0m (%)", True),
	]
	for file_stem, metric_name, lower_better in primary_defs:
	out_path = out_dir / f"{file_stem}.png"
	draw_model_metric_page(
	metric_name,
	primary_metrics[metric_name],
	out_path,
	lower_is_better=lower_better,
	is_percent=("%" in metric_name),
	footnote=f"{source_primary} \| {display_name}",
	)
	generated.append(out_path)

	runtime_defs = [
	("04_detection_latency", "Detection latency"),
	("05_transformer_predict_latency", "Transformer predict latency"),
	("06_end_to_end_cycle", "End-to-end live cycle"),
	("07_end_to_end_fps", "End-to-end throughput"),
	]
	for file_stem, metric_key in runtime_defs:
	val, unit = runtime_metrics[metric_key]
	target, lower_better = runtime_targets[metric_key]
	out_path = out_dir / f"{file_stem}.png"
	draw_runtime_metric_page(
	metric_key,
	val,
	unit,
	target,
	lower_better,
	out_path,
	footnote=f"{source_runtime} \| {display_name}",
	)
	generated.append(out_path)

	for idx, (metric_name, metric_payload) in enumerate(optional_metrics.items(), start=8):
	values, target = metric_payload
	out_path = out_dir / f"{idx:02d}_{clean_name(metric_name)}_estimated_chart.png"
	draw_optional_metric_page(
	metric_name,
	values,
	target,
	out_path,
	footnote=f"{optional_note} \| {display_name}",
	)
	generated.append(out_path)

	latency_share_path = out_dir / "14_latency_share_breakdown.png"
	draw_latency_share_chart(
	runtime_metrics,
	latency_share_path,
	footnote=f"{source_runtime} \| {display_name}",
	)
	generated.append(latency_share_path)

	write_analysis(
	out_dir,
	args.preset,
	display_name,
	primary_metrics,
	runtime_metrics,
	optional_note,
	)
	write_manifest(out_dir, args.preset, display_name, generated)

	print(f"Generated {len(generated)} benchmark pages in: {out_dir}")
	print(f"Manifest: {out_dir / 'benchmark_manifest.txt'}")
	print(f"Analysis: {out_dir / 'benchmark_analysis.txt'}")


	if __name__ == "__main__":
	main()