Spaces:

TheJackBright
/

polyguard-openenv-workbench

Sleeping

App Files Files Community

polyguard-openenv-workbench / polyguard-rl /scripts /render_diagram_images.py

TheJackBright

Deploy GitHub root master to Space

c296d62 12 days ago

raw

history blame contribute delete

36.5 kB

	#!/usr/bin/env python3
	"""Render polished PolyGuard architecture diagrams as individual PNG charts."""

	from __future__ import annotations

	import math
	import textwrap
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Iterable

	from PIL import Image, ImageDraw, ImageFont


	ROOT = Path(__file__).resolve().parents[1]
	OUT_DIR = ROOT / "docs" / "assets" / "diagrams"

	FONT_REGULAR = "/System/Library/Fonts/Supplemental/Arial.ttf"
	FONT_BOLD = "/System/Library/Fonts/Supplemental/Arial Bold.ttf"

	BG = "#f6f8fb"
	INK = "#172033"
	MUTED = "#64748b"
	LINE = "#718096"
	WHITE = "#ffffff"

	BLUE = ("#eaf3ff", "#2563eb")
	VIOLET = ("#f3edff", "#7c3aed")
	TEAL = ("#e8f8f2", "#0f766e")
	AMBER = ("#fff5df", "#b45309")
	ROSE = ("#fff1f3", "#e11d48")
	SLATE = ("#eef2f7", "#475569")
	MINT = ("#e7f7fb", "#0891b2")


	@dataclass(frozen=True)
	class Rect:
	x: int
	y: int
	w: int
	h: int


	def font(size: int, bold: bool = False) -> ImageFont.FreeTypeFont:
	path = FONT_BOLD if bold else FONT_REGULAR
	return ImageFont.truetype(path, size)


	def text_size(draw: ImageDraw.ImageDraw, text: str, fnt: ImageFont.FreeTypeFont) -> tuple[int, int]:
	if not text:
	return 0, 0
	box = draw.textbbox((0, 0), text, font=fnt)
	return box[2] - box[0], box[3] - box[1]


	def wrap_lines(draw: ImageDraw.ImageDraw, text: str, fnt: ImageFont.FreeTypeFont, max_width: int) -> list[str]:
	lines: list[str] = []
	for part in text.split("\n"):
	if not part.strip():
	lines.append("")
	continue
	words = part.split()
	current = ""
	for word in words:
	candidate = word if not current else f"{current} {word}"
	if text_size(draw, candidate, fnt)[0] <= max_width:
	current = candidate
	else:
	if current:
	lines.append(current)
	current = word
	else:
	chunks = textwrap.wrap(word, width=max(8, max_width // max(1, fnt.size)))
	lines.extend(chunks[:-1])
	current = chunks[-1] if chunks else word
	if current:
	lines.append(current)
	return lines


	def draw_centered_lines(
	draw: ImageDraw.ImageDraw,
	lines: Iterable[str],
	fnt: ImageFont.FreeTypeFont,
	x: int,
	y: int,
	w: int,
	fill: str = INK,
	line_gap: int = 7,
	) -> int:
	yy = y
	for line in lines:
	tw, th = text_size(draw, line, fnt)
	draw.text((x + (w - tw) / 2, yy), line, font=fnt, fill=fill)
	yy += th + line_gap
	return yy


	def rounded(
	draw: ImageDraw.ImageDraw,
	rect: Rect,
	fill: str,
	outline: str = "#d5deea",
	width: int = 2,
	radius: int = 22,
	shadow: bool = True,
	) -> None:
	if shadow:
	shadow_rect = (rect.x + 8, rect.y + 10, rect.x + rect.w + 8, rect.y + rect.h + 10)
	draw.rounded_rectangle(shadow_rect, radius=radius, fill="#dfe6ef")
	draw.rounded_rectangle(
	(rect.x, rect.y, rect.x + rect.w, rect.y + rect.h),
	radius=radius,
	fill=fill,
	outline=outline,
	width=width,
	)


	def anchor(rect: Rect, side: str) -> tuple[int, int]:
	if side == "top":
	return rect.x + rect.w // 2, rect.y
	if side == "bottom":
	return rect.x + rect.w // 2, rect.y + rect.h
	if side == "left":
	return rect.x, rect.y + rect.h // 2
	if side == "right":
	return rect.x + rect.w, rect.y + rect.h // 2
	return rect.x + rect.w // 2, rect.y + rect.h // 2


	def arrow(
	draw: ImageDraw.ImageDraw,
	points: list[tuple[int, int]],
	color: str = LINE,
	width: int = 4,
	label: str \| None = None,
	label_offset: tuple[int, int] = (0, -26),
	) -> None:
	draw.line(points, fill=color, width=width, joint="curve")
	if len(points) < 2:
	return
	x1, y1 = points[-2]
	x2, y2 = points[-1]
	angle = math.atan2(y2 - y1, x2 - x1)
	size = 17
	left = (x2 - size * math.cos(angle - math.pi / 7), y2 - size * math.sin(angle - math.pi / 7))
	right = (x2 - size * math.cos(angle + math.pi / 7), y2 - size * math.sin(angle + math.pi / 7))
	draw.polygon([(x2, y2), left, right], fill=color)
	if label:
	mx = (x1 + x2) // 2 + label_offset[0]
	my = (y1 + y2) // 2 + label_offset[1]
	fnt = font(22, bold=True)
	tw, th = text_size(draw, label, fnt)
	pad_x, pad_y = 12, 6
	draw.rounded_rectangle(
	(mx - tw / 2 - pad_x, my - pad_y, mx + tw / 2 + pad_x, my + th + pad_y),
	radius=12,
	fill=WHITE,
	outline="#dbe3ee",
	)
	draw.text((mx - tw / 2, my), label, font=fnt, fill=color)


	class Chart:
	def __init__(self, width: int, height: int, title: str, subtitle: str = "") -> None:
	self.width = width
	self.height = height
	self.image = Image.new("RGB", (width, height), BG)
	self.draw = ImageDraw.Draw(self.image)
	self.title(title, subtitle)

	def title(self, title: str, subtitle: str = "") -> None:
	self.draw.rectangle((0, 0, self.width, 14), fill="#1d4ed8")
	self.draw.text((76, 54), title, font=font(58, bold=True), fill=INK)
	if subtitle:
	self.draw.text((78, 126), subtitle, font=font(26), fill=MUTED)

	def group(self, rect: Rect, title: str, palette: tuple[str, str]) -> None:
	fill, accent = palette
	rounded(self.draw, rect, fill=fill, outline="#cbd5e1", width=2, radius=30, shadow=False)
	self.draw.rounded_rectangle(
	(rect.x, rect.y, rect.x + rect.w, rect.y + 64),
	radius=30,
	fill=accent,
	)
	self.draw.rectangle((rect.x, rect.y + 34, rect.x + rect.w, rect.y + 64), fill=accent)
	self.draw.text((rect.x + 26, rect.y + 18), title, font=font(26, bold=True), fill=WHITE)

	def box(
	self,
	rect: Rect,
	title: str,
	body: str = "",
	palette: tuple[str, str] = SLATE,
	title_size: int = 25,
	body_size: int = 20,
	center: bool = True,
	) -> Rect:
	fill, accent = palette
	rounded(self.draw, rect, fill=WHITE, outline="#cbd5e1", width=2, radius=20, shadow=True)
	self.draw.rounded_rectangle((rect.x, rect.y, rect.x + 10, rect.y + rect.h), radius=20, fill=accent)
	title_font = font(title_size, bold=True)
	body_font = font(body_size)
	max_width = rect.w - 46
	title_lines = wrap_lines(self.draw, title, title_font, max_width)
	body_lines = wrap_lines(self.draw, body, body_font, max_width) if body else []
	title_height = sum(text_size(self.draw, line, title_font)[1] for line in title_lines) + max(0, len(title_lines) - 1) * 7
	body_height = sum(text_size(self.draw, line, body_font)[1] for line in body_lines) + max(0, len(body_lines) - 1) * 6
	gap = 10 if body_lines else 0
	total = title_height + body_height + gap
	yy = rect.y + max(18, (rect.h - total) // 2) if center else rect.y + 20
	if center:
	yy = draw_centered_lines(self.draw, title_lines, title_font, rect.x + 22, yy, max_width, INK)
	if body_lines:
	yy += gap
	draw_centered_lines(self.draw, body_lines, body_font, rect.x + 22, yy, max_width, MUTED, line_gap=6)
	else:
	self.draw.multiline_text((rect.x + 28, yy), "\n".join(title_lines), font=title_font, fill=INK, spacing=7)
	yy += title_height + gap
	if body_lines:
	self.draw.multiline_text((rect.x + 28, yy), "\n".join(body_lines), font=body_font, fill=MUTED, spacing=6)
	return rect

	def pill(self, rect: Rect, text: str, palette: tuple[str, str], size: int = 21) -> Rect:
	fill, accent = palette
	self.draw.rounded_rectangle(
	(rect.x, rect.y, rect.x + rect.w, rect.y + rect.h),
	radius=rect.h // 2,
	fill=fill,
	outline=accent,
	width=2,
	)
	lines = wrap_lines(self.draw, text, font(size, bold=True), rect.w - 28)
	total_h = len(lines) * (size + 6)
	draw_centered_lines(self.draw, lines, font(size, bold=True), rect.x + 14, rect.y + (rect.h - total_h) // 2, rect.w - 28, INK, line_gap=4)
	return rect

	def save(self, name: str) -> Path:
	OUT_DIR.mkdir(parents=True, exist_ok=True)
	path = OUT_DIR / f"{name}.png"
	self.image.save(path, quality=96)
	return path


	def system_architecture() -> Path:
	c = Chart(
	2400,
	1500,
	"PolyGuard System Architecture",
	"Research environment, policy stack, OpenEnv runtime, model artifacts, and evidence outputs.",
	)
	clients = Rect(110, 190, 2180, 190)
	api = Rect(110, 440, 2180, 190)
	agents = Rect(110, 690, 2180, 260)
	runtime = Rect(110, 1015, 1050, 300)
	assets = Rect(1240, 1015, 1050, 300)
	for rect, title, pal in [
	(clients, "User And Integration Surfaces", BLUE),
	(api, "API And OpenEnv Surface", VIOLET),
	(agents, "Multi-Agent Policy Stack", TEAL),
	(runtime, "OpenEnv Runtime And Rewards", AMBER),
	(assets, "Data, Models, And Evidence Outputs", MINT),
	]:
	c.group(rect, title, pal)

	client_boxes = [
	c.box(Rect(185, 275, 390, 78), "React Patient Workbench", palette=BLUE),
	c.box(Rect(665, 275, 350, 78), "Public HF Space", palette=BLUE),
	c.box(Rect(1105, 275, 380, 78), "One-Run Notebook / CLI", palette=BLUE),
	c.box(Rect(1575, 275, 365, 78), "OpenEnv Validator", palette=BLUE),
	]
	api_boxes = [
	c.box(Rect(260, 530, 330, 72), "app/api/routes.py", palette=VIOLET, title_size=21),
	c.box(Rect(720, 530, 300, 72), "APIService", palette=VIOLET, title_size=21),
	c.box(Rect(1180, 530, 370, 72), "PolicyProviderRouter", palette=VIOLET, title_size=21),
	c.box(Rect(1680, 530, 350, 72), "app/env/fastapi_app.py", palette=VIOLET, title_size=20),
	]
	top_agents = [
	c.box(Rect(215 + i * 405, 780, 285, 60), name, palette=TEAL, title_size=19)
	for i, name in enumerate(["MedRec", "Evidence", "Graph Safety", "Dosing", "Candidate"])
	]
	bottom_agents = [
	c.box(Rect(420 + i * 405, 865, 285, 60), name, palette=TEAL if name != "Contextual Bandit" else AMBER, title_size=19)
	for i, name in enumerate(["Supervisor", "Planner", "Contextual Bandit", "Critic", "Explainer"])
	]
	runtime_boxes = [
	c.box(Rect(185, 1110, 300, 78), "PolyGuardEnv", "stateful reset / step", palette=AMBER, title_size=22),
	c.box(Rect(545, 1110, 240, 78), "Verifier", "legality gates", palette=AMBER, title_size=21),
	c.box(Rect(845, 1110, 240, 78), "Reward Router", "13 components", palette=AMBER, title_size=21),
	c.box(Rect(345, 1215, 240, 66), "Transition", palette=AMBER, title_size=20),
	c.box(Rect(645, 1215, 240, 66), "Anti-Cheat", palette=AMBER, title_size=20),
	]
	asset_boxes = [
	c.box(Rect(1305, 1100, 215, 68), "Scenarios", palette=MINT, title_size=20),
	c.box(Rect(1560, 1100, 250, 68), "Drug Knowledge", palette=MINT, title_size=20),
	c.box(Rect(1850, 1100, 250, 68), "Active Qwen", "adapter / merged", palette=MINT, title_size=20),
	c.box(Rect(1305, 1215, 215, 68), "Retrieval Index", palette=MINT, title_size=20),
	c.box(Rect(1560, 1215, 250, 68), "Evaluation Suites", palette=MINT, title_size=20),
	c.box(Rect(1850, 1215, 250, 68), "docs/results", "charts + reports", palette=MINT, title_size=20),
	]

	for a, b in zip(api_boxes, api_boxes[1:]):
	arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#7c3aed")
	for a, b in zip(top_agents, top_agents[1:]):
	arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#0f766e", width=3)
	c.pill(Rect(990, 845, 420, 44), "routed planning and critique", TEAL, size=18)
	for a, b in zip(bottom_agents, bottom_agents[1:]):
	arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#0f766e", width=3)
	arrow(c.draw, [anchor(runtime_boxes[0], "right"), anchor(runtime_boxes[1], "left")], color="#b45309")
	arrow(c.draw, [anchor(runtime_boxes[1], "right"), anchor(runtime_boxes[2], "left")], color="#b45309")
	arrow(c.draw, [anchor(runtime_boxes[0], "bottom"), anchor(runtime_boxes[3], "top")], color="#b45309")
	arrow(c.draw, [anchor(runtime_boxes[2], "bottom"), anchor(runtime_boxes[4], "top")], color="#b45309")
	arrow(c.draw, [anchor(asset_boxes[0], "right"), anchor(asset_boxes[1], "left")], color="#0891b2")
	arrow(c.draw, [anchor(asset_boxes[1], "right"), anchor(asset_boxes[2], "left")], color="#0891b2")
	arrow(c.draw, [anchor(asset_boxes[3], "right"), anchor(asset_boxes[4], "left")], color="#475569")
	arrow(c.draw, [anchor(asset_boxes[4], "right"), anchor(asset_boxes[5], "left")], color="#475569")
	arrow(c.draw, [(1200, 380), (1200, 440)], color="#3b82f6", label="requests")
	arrow(c.draw, [(1200, 630), (1200, 690)], color="#7c3aed", label="orchestrates")
	arrow(c.draw, [(760, 950), (760, 1015)], color="#0f766e", label="safe action")
	arrow(c.draw, [(1725, 950), (1725, 1015)], color="#0891b2", label="model + evidence")
	arrow(c.draw, [(1160, 1165), (1240, 1165)], color="#64748b", label="reports")
	return c.save("system_architecture")


	def runtime_step_flow() -> Path:
	c = Chart(2400, 1320, "Runtime Step Flow", "How one reset or action moves through UI, API, policy, environment, and reward scoring.")
	actors = [
	("User", 130, BLUE),
	("React Workbench", 430, BLUE),
	("FastAPI APIService", 760, VIOLET),
	("Orchestrator", 1100, TEAL),
	("PolyGuardEnv", 1440, AMBER),
	("Policy Provider", 1780, MINT),
	("Reward Router", 2090, ROSE),
	]
	x_positions: dict[str, int] = {}
	for name, x, pal in actors:
	rect = c.box(Rect(x, 210, 220, 82), name, palette=pal, title_size=22)
	x_positions[name] = rect.x + rect.w // 2
	c.draw.line((x_positions[name], 315, x_positions[name], 1185), fill="#d0d9e6", width=3)

	def msg(y: int, src: str, dst: str, label: str, color: str = LINE) -> None:
	sx, dx = x_positions[src], x_positions[dst]
	arrow(c.draw, [(sx, y), (dx, y)], color=color, width=4, label=label, label_offset=(0, -34))

	msg(390, "User", "React Workbench", "reset / run")
	msg(500, "React Workbench", "FastAPI APIService", "POST /env/reset")
	msg(610, "FastAPI APIService", "PolyGuardEnv", "reset(seed, task)", "#b45309")
	msg(720, "PolyGuardEnv", "FastAPI APIService", "observation + candidates", "#b45309")
	msg(830, "React Workbench", "FastAPI APIService", "step_candidate or orchestrate")
	msg(940, "FastAPI APIService", "Orchestrator", "agent path", "#0f766e")
	msg(1050, "Orchestrator", "Policy Provider", "optional Qwen selection", "#0891b2")
	msg(1160, "Orchestrator", "PolyGuardEnv", "final action", "#0f766e")
	msg(1020, "PolyGuardEnv", "Reward Router", "13 components -> 4 channels", "#e11d48")
	c.box(Rect(1290, 1160, 430, 90), "Response", "observation, reward, done, trace, info", palette=SLATE)
	arrow(c.draw, [(1440 + 110, 1120), (1505, 1160)], color="#64748b")
	arrow(c.draw, [(1290, 1205), (650, 1205), (650, 900)], color="#64748b", label="render updated panels", label_offset=(0, 16))
	return c.save("runtime_step_flow")


	def data_training_pipeline() -> Path:
	c = Chart(2400, 1320, "Data And Training Pipeline", "From local knowledge and synthetic cases to SFT, GRPO, activation, and inference.")
	groups = [
	(Rect(90, 220, 430, 880), "Sources", BLUE),
	(Rect(610, 220, 520, 880), "DataOps", TEAL),
	(Rect(1220, 220, 520, 880), "Post-Training", VIOLET),
	(Rect(1830, 220, 480, 880), "Validation And Use", AMBER),
	]
	for rect, title, pal in groups:
	c.group(rect, title, pal)
	sources = [
	c.box(Rect(150, 325, 310, 76), "Local drug knowledge", palette=BLUE, title_size=21),
	c.box(Rect(150, 435, 310, 76), "Synthetic patients", palette=BLUE, title_size=21),
	c.box(Rect(150, 545, 310, 76), "Scenario files", "easy / medium / hard", palette=BLUE, title_size=21),
	c.box(Rect(150, 655, 310, 76), "Optional HF data", palette=BLUE, title_size=21),
	c.box(Rect(150, 765, 310, 76), "DDI API", "optional", palette=BLUE, title_size=21),
	c.box(Rect(150, 875, 310, 76), "Web fallback", "optional", palette=BLUE, title_size=21),
	]
	dataops = [
	c.box(Rect(700, 330, 340, 78), "Normalize drugs", palette=TEAL, title_size=22),
	c.box(Rect(700, 465, 340, 78), "Build knowledge graph", palette=TEAL, title_size=22),
	c.box(Rect(700, 600, 340, 78), "Build retrieval index", palette=TEAL, title_size=22),
	c.box(Rect(700, 735, 340, 78), "Build scenarios", palette=TEAL, title_size=22),
	c.box(Rect(700, 870, 340, 90), "Build SFT / GRPO corpus", palette=TEAL, title_size=22),
	]
	training = [
	c.box(Rect(1310, 345, 340, 86), "TRL SFT adapter", palette=VIOLET, title_size=22),
	c.box(Rect(1310, 505, 340, 86), "TRL GRPO", "environment reward", palette=VIOLET, title_size=22),
	c.box(Rect(1310, 665, 340, 86), "Merge / export adapters", palette=VIOLET, title_size=22),
	c.box(Rect(1310, 825, 340, 86), "Registry + manifests", palette=VIOLET, title_size=22),
	]
	validation = [
	c.box(Rect(1905, 345, 310, 86), "Post-save inference", palette=AMBER, title_size=22),
	c.box(Rect(1905, 505, 310, 86), "Activate model", palette=AMBER, title_size=22),
	c.box(Rect(1905, 665, 310, 86), "/policy/model_status", palette=AMBER, title_size=21),
	c.box(Rect(1905, 825, 310, 86), "/policy/infer", palette=AMBER, title_size=21),
	]
	for src in sources:
	arrow(c.draw, [anchor(src, "right"), anchor(dataops[-1], "left")], color="#3b82f6", width=3)
	for a, b in zip(dataops, dataops[1:]):
	arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#0f766e")
	arrow(c.draw, [anchor(dataops[-1], "right"), anchor(training[0], "left")], color="#7c3aed", label="corpus")
	for a, b in zip(training, training[1:]):
	arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#7c3aed")
	arrow(c.draw, [anchor(training[-1], "right"), anchor(validation[0], "left")], color="#b45309", label="artifact")
	for a, b in zip(validation, validation[1:]):
	arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#b45309")
	return c.save("data_training_pipeline")


	def multi_agent_orchestration() -> Path:
	c = Chart(2400, 1250, "Multi-Agent Orchestration", "Specialized agents build a verified candidate, route it through policy control, then close the loop with reward feedback.")
	input_group = Rect(90, 250, 560, 850)
	decision_group = Rect(760, 250, 780, 850)
	closure_group = Rect(1650, 250, 660, 850)
	for rect, title, pal in [
	(input_group, "Candidate Construction", BLUE),
	(decision_group, "Policy Control", VIOLET),
	(closure_group, "Step Closure", AMBER),
	]:
	c.group(rect, title, pal)

	inputs = [
	c.box(Rect(210, 360, 320, 72), "State", palette=BLUE, title_size=22),
	c.box(Rect(210, 465, 320, 72), "MedRec", palette=TEAL, title_size=22),
	c.box(Rect(210, 570, 320, 72), "Evidence", palette=TEAL, title_size=22),
	c.box(Rect(210, 675, 320, 72), "Graph Safety", palette=TEAL, title_size=22),
	c.box(Rect(210, 780, 320, 72), "Dosing", palette=TEAL, title_size=22),
	c.box(Rect(210, 885, 320, 82), "Candidate", "legal action set", palette=TEAL, title_size=22),
	]
	for a, b in zip(inputs, inputs[1:]):
	arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#0f766e", width=3)

	supervisor = c.box(Rect(1020, 360, 260, 82), "Supervisor", "routes context", palette=TEAL, title_size=22)
	bandit = c.box(Rect(850, 525, 250, 82), "Bandit Top-K", "policy shortlist", palette=AMBER, title_size=22)
	planner = c.box(Rect(1190, 525, 250, 82), "Planner", "drafts action", palette=VIOLET, title_size=22)
	critic = c.box(Rect(1190, 700, 250, 82), "Critic", "checks action", palette=ROSE, title_size=22)
	replan = c.box(Rect(850, 700, 250, 82), "Review / Replan", "on veto", palette=ROSE, title_size=22)
	c.pill(Rect(850, 915, 590, 74), "coordination: supervisor routing \| veto loop \| lightweight debate", SLATE, size=21)

	env_step = c.box(Rect(1855, 370, 250, 84), "Env Step", "apply transition", palette=AMBER, title_size=22)
	explainer = c.box(Rect(1855, 540, 250, 84), "Explainer", "grounded rationale", palette=TEAL, title_size=22)
	reward = c.box(Rect(1855, 710, 250, 84), "Reward + Trace", "step feedback", palette=SLATE, title_size=22)
	update = c.box(Rect(1855, 880, 250, 84), "Bandit Update", "learn from reward", palette=AMBER, title_size=22)

	arrow(c.draw, [anchor(inputs[-1], "right"), (705, 926), (705, 401), anchor(supervisor, "left")], color="#2563eb", label="candidate")
	arrow(c.draw, [anchor(supervisor, "bottom"), (1150, 485), anchor(bandit, "top")], color="#b45309")
	arrow(c.draw, [anchor(supervisor, "bottom"), (1150, 485), anchor(planner, "top")], color="#7c3aed")
	arrow(c.draw, [anchor(bandit, "right"), anchor(planner, "left")], color="#b45309")
	arrow(c.draw, [anchor(planner, "bottom"), anchor(critic, "top")], color="#7c3aed")
	arrow(c.draw, [anchor(critic, "left"), anchor(replan, "right")], color="#e11d48", label="veto")
	arrow(c.draw, [anchor(replan, "top"), (975, 650), (1315, 650), anchor(planner, "bottom")], color="#e11d48")
	arrow(c.draw, [anchor(critic, "right"), anchor(env_step, "left")], color="#0f766e", label="approved")
	arrow(c.draw, [anchor(env_step, "bottom"), anchor(explainer, "top")], color="#0f766e")
	arrow(c.draw, [anchor(explainer, "bottom"), anchor(reward, "top")], color="#64748b")
	arrow(c.draw, [anchor(reward, "bottom"), anchor(update, "top")], color="#b45309")
	arrow(c.draw, [anchor(update, "left"), (1585, 922), (1585, 1055), (800, 1055), (800, 566), anchor(bandit, "left")], color="#b45309", label="reward learning", label_offset=(-170, 10))
	return c.save("multi_agent_orchestration")


	def reward_decomposition() -> Path:
	c = Chart(2500, 1420, "Reward Decomposition", "Verifier-backed rewards remain inspectable through component columns and judge-friendly primary channels.")
	action = c.box(Rect(930, 210, 640, 92), "Candidate action", "selected legal candidate or fallback", palette=BLUE)
	checks = c.box(Rect(800, 360, 900, 94), "Verifier + Transition + Anti-Cheat + Uncertainty", palette=VIOLET, title_size=25)
	arrow(c.draw, [anchor(action, "bottom"), anchor(checks, "top")], color="#7c3aed")
	channel_specs = [
	(
	Rect(140, 575, 500, 455),
	"safety_legality",
	"legal and safe action choice",
	["format compliance", "candidate alignment", "legality", "safety delta"],
	ROSE,
	),
	(
	Rect(730, 575, 500, 455),
	"clinical_improvement",
	"clinical risk moves in the right direction",
	["burden improvement", "disease stability"],
	TEAL,
	),
	(
	Rect(1320, 575, 500, 455),
	"dosing_quality",
	"dose-sensitive decisions are handled",
	["dosing quality"],
	AMBER,
	),
	(
	Rect(1910, 575, 500, 455),
	"process_integrity",
	"process, uncertainty, and anti-cheat safeguards",
	["abstention quality", "efficiency", "process fidelity", "explanation grounding", "anti-cheat", "uncertainty calibration"],
	VIOLET,
	),
	]
	channels: list[Rect] = []
	for rect, title, subtitle, components, pal in channel_specs:
	c.group(rect, title, pal)
	subtitle_font = font(21)
	lines = wrap_lines(c.draw, subtitle, subtitle_font, rect.w - 64)
	c.draw.multiline_text((rect.x + 32, rect.y + 86), "\n".join(lines), font=subtitle_font, fill=MUTED, spacing=5)
	compact = len(components) > 4
	y = rect.y + (148 if compact else 155)
	pill_h = 42 if compact else 54
	step = 50 if compact else 66
	for item in components:
	c.pill(Rect(rect.x + 44, y, rect.w - 88, pill_h), item, pal, size=17 if compact else 19)
	y += step
	channels.append(rect)
	arrow(c.draw, [anchor(checks, "bottom"), (rect.x + rect.w // 2, 520), anchor(rect, "top")], color=pal[1], width=3)
	total = c.box(Rect(930, 1230, 640, 102), "total_reward", "clamped to 0.001 - 0.999", palette=BLUE, title_size=28)
	for ch in channels:
	arrow(c.draw, [anchor(ch, "bottom"), anchor(total, "top")], color="#2563eb", width=3)
	return c.save("reward_decomposition")


	def episode_state_machine() -> Path:
	c = Chart(2250, 1120, "Episode State Machine", "Terminal reasons are explicit, making rollouts auditable and reward hacking visible.")
	nodes = {
	"Start": c.box(Rect(100, 520, 190, 82), "Start", palette=BLUE),
	"Reset": c.box(Rect(390, 520, 190, 82), "Reset", palette=BLUE),
	"Observe": c.box(Rect(680, 520, 220, 82), "Observe", palette=TEAL),
	"Select": c.box(Rect(1020, 500, 260, 122), "Candidate Selection", palette=TEAL),
	"Verify": c.box(Rect(1420, 500, 240, 122), "Verification", palette=VIOLET),
	"Transition": c.box(Rect(1810, 395, 245, 90), "Transition", palette=TEAL),
	"Rollback": c.box(Rect(1810, 610, 245, 90), "Rollback", palette=ROSE),
	"Reward": c.box(Rect(1450, 820, 250, 96), "Reward Scoring", palette=AMBER),
	"Continue": c.box(Rect(980, 820, 250, 96), "Continue", palette=SLATE),
	"Done": c.box(Rect(1950, 820, 220, 96), "Done", palette=BLUE),
	}
	chain = ["Start", "Reset", "Observe", "Select", "Verify"]
	for a, b in zip(chain, chain[1:]):
	arrow(c.draw, [anchor(nodes[a], "right"), anchor(nodes[b], "left")], color="#475569")
	arrow(c.draw, [anchor(nodes["Verify"], "right"), anchor(nodes["Transition"], "left")], color="#0f766e", label="legal")
	arrow(c.draw, [anchor(nodes["Verify"], "right"), (1725, 560), anchor(nodes["Rollback"], "left")], color="#e11d48", label="blocked")
	arrow(c.draw, [anchor(nodes["Transition"], "bottom"), (1930, 780), anchor(nodes["Reward"], "right")], color="#b45309")
	arrow(c.draw, [anchor(nodes["Rollback"], "bottom"), (1930, 780), anchor(nodes["Reward"], "right")], color="#b45309")
	arrow(c.draw, [anchor(nodes["Reward"], "left"), anchor(nodes["Continue"], "right")], color="#64748b", label="budget remains")
	arrow(c.draw, [anchor(nodes["Continue"], "top"), (1105, 690), (790, 690), anchor(nodes["Observe"], "bottom")], color="#64748b")
	reasons = ["safe resolution", "review escalation", "exploit detected", "timeout", "budget exhausted"]
	for i, reason in enumerate(reasons):
	y = 760 + i * 50
	c.pill(Rect(1735, y, 175, 36), reason, SLATE, size=16)
	arrow(c.draw, [(1910, y + 18), anchor(nodes["Done"], "left")], color="#2563eb", width=2)
	return c.save("episode_state_machine")


	def deployment_topology() -> Path:
	c = Chart(2400, 1380, "Deployment Topology", "Local services, public product Space, private training Space, and artifact exchange on Hugging Face Hub.")
	local = Rect(100, 245, 580, 830)
	product = Rect(810, 245, 600, 350)
	training = Rect(810, 725, 600, 350)
	hub = Rect(1540, 245, 760, 830)
	for rect, title, pal in [
	(local, "Local Developer Machine", BLUE),
	(product, "Public Product Space", TEAL),
	(training, "Private Training Space", VIOLET),
	(hub, "Hugging Face Hub", AMBER),
	]:
	c.group(rect, title, pal)
	repo = c.box(Rect(240, 365, 300, 86), "polyguard-rl repo", palette=BLUE, title_size=22)
	local_runtime = c.box(Rect(165, 545, 210, 82), "Local API", ":8200", palette=VIOLET, title_size=21)
	local_env = c.box(Rect(405, 545, 210, 82), "OpenEnv", ":8201", palette=AMBER, title_size=21)
	vite = c.box(Rect(165, 695, 210, 82), "Vite UI", ":5173", palette=BLUE, title_size=21)
	checks = c.box(Rect(405, 695, 210, 82), "Checks", "pytest / validate / gate", palette=SLATE, title_size=21)
	space_bundle = c.box(Rect(955, 365, 310, 84), "Product Docker Bundle", palette=TEAL, title_size=22)
	product_runtime = c.box(Rect(890, 500, 205, 76), "FastAPI Runtime", palette=TEAL, title_size=19)
	product_ui = c.box(Rect(1135, 500, 205, 76), "React Workbench", palette=TEAL, title_size=19)
	train_bundle = c.box(Rect(955, 845, 310, 84), "Training Docker Space", palette=VIOLET, title_size=22)
	runner = c.box(Rect(890, 980, 205, 76), "Gradio Runner", palette=VIOLET, title_size=19)
	gpu = c.box(Rect(1135, 980, 205, 76), "HF GPU A10G", palette=VIOLET, title_size=19)
	product_repo = c.box(Rect(1625, 360, 265, 86), "Product Space Repo", "polyguard-openenv", palette=AMBER, title_size=21)
	training_repo = c.box(Rect(1975, 360, 240, 86), "Training Space Repo", palette=AMBER, title_size=21)
	artifact_repo = c.box(Rect(1625, 610, 265, 86), "Artifact Repo", "adapters / reports", palette=AMBER, title_size=21)
	evidence_repo = c.box(Rect(1975, 610, 240, 86), "Evidence Space", palette=AMBER, title_size=21)
	docs = c.box(Rect(1780, 850, 275, 86), "Local docs/results", "pulled evidence", palette=SLATE, title_size=21)
	for target in [local_runtime, local_env, vite, checks]:
	arrow(c.draw, [anchor(repo, "bottom"), anchor(target, "top")], color="#2563eb")
	arrow(c.draw, [anchor(repo, "right"), anchor(space_bundle, "left")], color="#0f766e", label="deploy product")
	arrow(c.draw, [anchor(repo, "right"), (745, 885), anchor(train_bundle, "left")], color="#7c3aed", label="deploy training")
	arrow(c.draw, [anchor(space_bundle, "right"), anchor(product_repo, "left")], color="#0f766e")
	arrow(c.draw, [anchor(space_bundle, "bottom"), anchor(product_runtime, "top")], color="#0f766e")
	arrow(c.draw, [anchor(space_bundle, "bottom"), anchor(product_ui, "top")], color="#0f766e")
	arrow(c.draw, [anchor(train_bundle, "right"), anchor(training_repo, "left")], color="#7c3aed")
	arrow(c.draw, [anchor(train_bundle, "bottom"), anchor(runner, "top")], color="#7c3aed")
	arrow(c.draw, [anchor(runner, "right"), anchor(gpu, "left")], color="#7c3aed")
	arrow(c.draw, [anchor(runner, "right"), anchor(artifact_repo, "left")], color="#b45309", label="upload")
	arrow(c.draw, [anchor(artifact_repo, "right"), anchor(evidence_repo, "left")], color="#b45309")
	arrow(c.draw, [anchor(artifact_repo, "bottom"), anchor(docs, "top")], color="#64748b", label="pull")
	return c.save("deployment_topology")


	def evidence_generation_flow() -> Path:
	c = Chart(2300, 980, "Evidence Generation Flow", "Training outputs are converted into reviewer-facing reports, plots, bundles, and README claims.")
	train = c.box(Rect(100, 435, 250, 96), "SFT / GRPO Runs", palette=VIOLET)
	reports = c.box(Rect(465, 320, 260, 90), "Run Reports", palette=AMBER)
	checkpoints = c.box(Rect(465, 560, 260, 90), "Adapters + Merged Artifacts", palette=AMBER, title_size=22)
	pull = c.box(Rect(850, 435, 260, 96), "Pull Training Artifacts", palette=BLUE)
	post = c.box(Rect(1250, 260, 290, 90), "Post-Save Inference", palette=TEAL)
	ablations = c.box(Rect(1250, 435, 290, 90), "Policy-Stack Ablations", palette=TEAL)
	benchmarks = c.box(Rect(1250, 610, 290, 90), "Benchmarks + Robustness", palette=TEAL)
	charts = c.box(Rect(1655, 435, 210, 90), "Charts", palette=ROSE)
	results = c.box(Rect(1955, 320, 260, 80), "docs/results", palette=SLATE, title_size=22)
	bundle = c.box(Rect(1955, 455, 260, 80), "Submission Bundle", palette=SLATE, title_size=22)
	readme = c.box(Rect(1955, 590, 260, 80), "README Claims", palette=SLATE, title_size=22)
	arrow(c.draw, [anchor(train, "right"), anchor(reports, "left")], color="#b45309")
	arrow(c.draw, [anchor(train, "right"), anchor(checkpoints, "left")], color="#b45309")
	arrow(c.draw, [anchor(reports, "right"), anchor(pull, "left")], color="#2563eb")
	arrow(c.draw, [anchor(checkpoints, "right"), anchor(pull, "left")], color="#2563eb")
	for target in [post, ablations, benchmarks]:
	arrow(c.draw, [anchor(pull, "right"), anchor(target, "left")], color="#0f766e")
	arrow(c.draw, [anchor(target, "right"), anchor(charts, "left")], color="#e11d48")
	arrow(c.draw, [anchor(charts, "right"), anchor(results, "left")], color="#64748b")
	arrow(c.draw, [anchor(charts, "right"), anchor(bundle, "left")], color="#64748b")
	arrow(c.draw, [anchor(charts, "right"), anchor(readme, "left")], color="#64748b")
	return c.save("evidence_generation_flow")


	def frontend_runtime_surface() -> Path:
	c = Chart(2300, 1350, "Frontend Runtime Surface", "React pages map to concrete FastAPI endpoints used by the Patient Workbench and supporting views.")
	pages_group = Rect(100, 245, 560, 965)
	api_group = Rect(780, 245, 720, 965)
	runtime_group = Rect(1640, 245, 560, 965)
	c.group(pages_group, "React App Pages", BLUE)
	c.group(api_group, "API Endpoints", TEAL)
	c.group(runtime_group, "Backend Runtime", VIOLET)
	app = c.box(Rect(230, 365, 300, 90), "React App", palette=BLUE)
	pages = [
	c.box(Rect(190, 525, 340, 78), "Patient Workbench", palette=BLUE, title_size=21),
	c.box(Rect(190, 665, 340, 78), "Policy Lab", palette=BLUE, title_size=21),
	c.box(Rect(190, 805, 340, 78), "Safety + Dosing Views", palette=BLUE, title_size=21),
	c.box(Rect(190, 945, 340, 78), "Replay + Training Views", palette=BLUE, title_size=21),
	]
	c.pill(Rect(190, 1090, 340, 48), "shared fetchJson client", BLUE, size=18)
	arrow(c.draw, [anchor(app, "bottom"), anchor(pages[0], "top")], color="#2563eb", width=3)
	for a, b in zip(pages, pages[1:]):
	arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#2563eb", width=3)

	endpoint_cards = [
	c.box(
	Rect(900, 485, 470, 116),
	"Session + Step",
	"POST /env/reset\nPOST /env/step_candidate",
	palette=TEAL,
	title_size=22,
	body_size=18,
	),
	c.box(
	Rect(900, 655, 470, 132),
	"Policy + Safety",
	"POST /agents/orchestrate\nGET /env/reward_breakdown\nGET /policy/model_status",
	palette=TEAL,
	title_size=22,
	body_size=17,
	),
	c.box(
	Rect(900, 850, 470, 116),
	"Evaluation",
	"POST /eval/run_baselines\nPOST /eval/run_dosing",
	palette=TEAL,
	title_size=22,
	body_size=18,
	),
	c.box(
	Rect(900, 1035, 470, 116),
	"Trace + Metrics",
	"GET /env/trace\nGET /metrics/training",
	palette=TEAL,
	title_size=22,
	body_size=18,
	),
	]
	for page, endpoint in zip(pages, endpoint_cards):
	arrow(c.draw, [anchor(page, "right"), anchor(endpoint, "left")], color="#0f766e", width=3)
	c.pill(Rect(945, 1170, 380, 48), "all calls use API_BASE", TEAL, size=18)

	api = c.box(Rect(1785, 395, 270, 96), "FastAPI API", palette=VIOLET)
	env = c.box(Rect(1785, 610, 270, 96), "PolyGuardEnv", palette=AMBER)
	policy = c.box(Rect(1785, 825, 270, 96), "Policy Runtime", palette=MINT)
	evals = c.box(Rect(1785, 1040, 270, 96), "Eval + Metrics", palette=SLATE)
	arrow(c.draw, [anchor(api_group, "right"), anchor(api, "left")], color="#7c3aed", width=4, label="fetchJson")
	arrow(c.draw, [anchor(api, "bottom"), anchor(env, "top")], color="#b45309")
	arrow(c.draw, [anchor(env, "bottom"), anchor(policy, "top")], color="#0891b2")
	arrow(c.draw, [anchor(policy, "bottom"), anchor(evals, "top")], color="#64748b")
	return c.save("frontend_runtime_surface")


	RENDERERS = [
	system_architecture,
	runtime_step_flow,
	data_training_pipeline,
	multi_agent_orchestration,
	reward_decomposition,
	episode_state_machine,
	deployment_topology,
	evidence_generation_flow,
	frontend_runtime_surface,
	]


	def main() -> None:
	OUT_DIR.mkdir(parents=True, exist_ok=True)
	for existing in OUT_DIR.glob("*.png"):
	existing.unlink()
	rendered = [renderer() for renderer in RENDERERS]
	print("rendered_diagrams:")
	for path in rendered:
	print(path.relative_to(ROOT))


	if __name__ == "__main__":
	main()