polyguard-openenv-workbench / polyguard-rl /scripts /render_diagram_images.py
TheJackBright's picture
Deploy GitHub root master to Space
c296d62
#!/usr/bin/env python3
"""Render polished PolyGuard architecture diagrams as individual PNG charts."""
from __future__ import annotations
import math
import textwrap
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable
from PIL import Image, ImageDraw, ImageFont
ROOT = Path(__file__).resolve().parents[1]
OUT_DIR = ROOT / "docs" / "assets" / "diagrams"
FONT_REGULAR = "/System/Library/Fonts/Supplemental/Arial.ttf"
FONT_BOLD = "/System/Library/Fonts/Supplemental/Arial Bold.ttf"
BG = "#f6f8fb"
INK = "#172033"
MUTED = "#64748b"
LINE = "#718096"
WHITE = "#ffffff"
BLUE = ("#eaf3ff", "#2563eb")
VIOLET = ("#f3edff", "#7c3aed")
TEAL = ("#e8f8f2", "#0f766e")
AMBER = ("#fff5df", "#b45309")
ROSE = ("#fff1f3", "#e11d48")
SLATE = ("#eef2f7", "#475569")
MINT = ("#e7f7fb", "#0891b2")
@dataclass(frozen=True)
class Rect:
x: int
y: int
w: int
h: int
def font(size: int, bold: bool = False) -> ImageFont.FreeTypeFont:
path = FONT_BOLD if bold else FONT_REGULAR
return ImageFont.truetype(path, size)
def text_size(draw: ImageDraw.ImageDraw, text: str, fnt: ImageFont.FreeTypeFont) -> tuple[int, int]:
if not text:
return 0, 0
box = draw.textbbox((0, 0), text, font=fnt)
return box[2] - box[0], box[3] - box[1]
def wrap_lines(draw: ImageDraw.ImageDraw, text: str, fnt: ImageFont.FreeTypeFont, max_width: int) -> list[str]:
lines: list[str] = []
for part in text.split("\n"):
if not part.strip():
lines.append("")
continue
words = part.split()
current = ""
for word in words:
candidate = word if not current else f"{current} {word}"
if text_size(draw, candidate, fnt)[0] <= max_width:
current = candidate
else:
if current:
lines.append(current)
current = word
else:
chunks = textwrap.wrap(word, width=max(8, max_width // max(1, fnt.size)))
lines.extend(chunks[:-1])
current = chunks[-1] if chunks else word
if current:
lines.append(current)
return lines
def draw_centered_lines(
draw: ImageDraw.ImageDraw,
lines: Iterable[str],
fnt: ImageFont.FreeTypeFont,
x: int,
y: int,
w: int,
fill: str = INK,
line_gap: int = 7,
) -> int:
yy = y
for line in lines:
tw, th = text_size(draw, line, fnt)
draw.text((x + (w - tw) / 2, yy), line, font=fnt, fill=fill)
yy += th + line_gap
return yy
def rounded(
draw: ImageDraw.ImageDraw,
rect: Rect,
fill: str,
outline: str = "#d5deea",
width: int = 2,
radius: int = 22,
shadow: bool = True,
) -> None:
if shadow:
shadow_rect = (rect.x + 8, rect.y + 10, rect.x + rect.w + 8, rect.y + rect.h + 10)
draw.rounded_rectangle(shadow_rect, radius=radius, fill="#dfe6ef")
draw.rounded_rectangle(
(rect.x, rect.y, rect.x + rect.w, rect.y + rect.h),
radius=radius,
fill=fill,
outline=outline,
width=width,
)
def anchor(rect: Rect, side: str) -> tuple[int, int]:
if side == "top":
return rect.x + rect.w // 2, rect.y
if side == "bottom":
return rect.x + rect.w // 2, rect.y + rect.h
if side == "left":
return rect.x, rect.y + rect.h // 2
if side == "right":
return rect.x + rect.w, rect.y + rect.h // 2
return rect.x + rect.w // 2, rect.y + rect.h // 2
def arrow(
draw: ImageDraw.ImageDraw,
points: list[tuple[int, int]],
color: str = LINE,
width: int = 4,
label: str | None = None,
label_offset: tuple[int, int] = (0, -26),
) -> None:
draw.line(points, fill=color, width=width, joint="curve")
if len(points) < 2:
return
x1, y1 = points[-2]
x2, y2 = points[-1]
angle = math.atan2(y2 - y1, x2 - x1)
size = 17
left = (x2 - size * math.cos(angle - math.pi / 7), y2 - size * math.sin(angle - math.pi / 7))
right = (x2 - size * math.cos(angle + math.pi / 7), y2 - size * math.sin(angle + math.pi / 7))
draw.polygon([(x2, y2), left, right], fill=color)
if label:
mx = (x1 + x2) // 2 + label_offset[0]
my = (y1 + y2) // 2 + label_offset[1]
fnt = font(22, bold=True)
tw, th = text_size(draw, label, fnt)
pad_x, pad_y = 12, 6
draw.rounded_rectangle(
(mx - tw / 2 - pad_x, my - pad_y, mx + tw / 2 + pad_x, my + th + pad_y),
radius=12,
fill=WHITE,
outline="#dbe3ee",
)
draw.text((mx - tw / 2, my), label, font=fnt, fill=color)
class Chart:
def __init__(self, width: int, height: int, title: str, subtitle: str = "") -> None:
self.width = width
self.height = height
self.image = Image.new("RGB", (width, height), BG)
self.draw = ImageDraw.Draw(self.image)
self.title(title, subtitle)
def title(self, title: str, subtitle: str = "") -> None:
self.draw.rectangle((0, 0, self.width, 14), fill="#1d4ed8")
self.draw.text((76, 54), title, font=font(58, bold=True), fill=INK)
if subtitle:
self.draw.text((78, 126), subtitle, font=font(26), fill=MUTED)
def group(self, rect: Rect, title: str, palette: tuple[str, str]) -> None:
fill, accent = palette
rounded(self.draw, rect, fill=fill, outline="#cbd5e1", width=2, radius=30, shadow=False)
self.draw.rounded_rectangle(
(rect.x, rect.y, rect.x + rect.w, rect.y + 64),
radius=30,
fill=accent,
)
self.draw.rectangle((rect.x, rect.y + 34, rect.x + rect.w, rect.y + 64), fill=accent)
self.draw.text((rect.x + 26, rect.y + 18), title, font=font(26, bold=True), fill=WHITE)
def box(
self,
rect: Rect,
title: str,
body: str = "",
palette: tuple[str, str] = SLATE,
title_size: int = 25,
body_size: int = 20,
center: bool = True,
) -> Rect:
fill, accent = palette
rounded(self.draw, rect, fill=WHITE, outline="#cbd5e1", width=2, radius=20, shadow=True)
self.draw.rounded_rectangle((rect.x, rect.y, rect.x + 10, rect.y + rect.h), radius=20, fill=accent)
title_font = font(title_size, bold=True)
body_font = font(body_size)
max_width = rect.w - 46
title_lines = wrap_lines(self.draw, title, title_font, max_width)
body_lines = wrap_lines(self.draw, body, body_font, max_width) if body else []
title_height = sum(text_size(self.draw, line, title_font)[1] for line in title_lines) + max(0, len(title_lines) - 1) * 7
body_height = sum(text_size(self.draw, line, body_font)[1] for line in body_lines) + max(0, len(body_lines) - 1) * 6
gap = 10 if body_lines else 0
total = title_height + body_height + gap
yy = rect.y + max(18, (rect.h - total) // 2) if center else rect.y + 20
if center:
yy = draw_centered_lines(self.draw, title_lines, title_font, rect.x + 22, yy, max_width, INK)
if body_lines:
yy += gap
draw_centered_lines(self.draw, body_lines, body_font, rect.x + 22, yy, max_width, MUTED, line_gap=6)
else:
self.draw.multiline_text((rect.x + 28, yy), "\n".join(title_lines), font=title_font, fill=INK, spacing=7)
yy += title_height + gap
if body_lines:
self.draw.multiline_text((rect.x + 28, yy), "\n".join(body_lines), font=body_font, fill=MUTED, spacing=6)
return rect
def pill(self, rect: Rect, text: str, palette: tuple[str, str], size: int = 21) -> Rect:
fill, accent = palette
self.draw.rounded_rectangle(
(rect.x, rect.y, rect.x + rect.w, rect.y + rect.h),
radius=rect.h // 2,
fill=fill,
outline=accent,
width=2,
)
lines = wrap_lines(self.draw, text, font(size, bold=True), rect.w - 28)
total_h = len(lines) * (size + 6)
draw_centered_lines(self.draw, lines, font(size, bold=True), rect.x + 14, rect.y + (rect.h - total_h) // 2, rect.w - 28, INK, line_gap=4)
return rect
def save(self, name: str) -> Path:
OUT_DIR.mkdir(parents=True, exist_ok=True)
path = OUT_DIR / f"{name}.png"
self.image.save(path, quality=96)
return path
def system_architecture() -> Path:
c = Chart(
2400,
1500,
"PolyGuard System Architecture",
"Research environment, policy stack, OpenEnv runtime, model artifacts, and evidence outputs.",
)
clients = Rect(110, 190, 2180, 190)
api = Rect(110, 440, 2180, 190)
agents = Rect(110, 690, 2180, 260)
runtime = Rect(110, 1015, 1050, 300)
assets = Rect(1240, 1015, 1050, 300)
for rect, title, pal in [
(clients, "User And Integration Surfaces", BLUE),
(api, "API And OpenEnv Surface", VIOLET),
(agents, "Multi-Agent Policy Stack", TEAL),
(runtime, "OpenEnv Runtime And Rewards", AMBER),
(assets, "Data, Models, And Evidence Outputs", MINT),
]:
c.group(rect, title, pal)
client_boxes = [
c.box(Rect(185, 275, 390, 78), "React Patient Workbench", palette=BLUE),
c.box(Rect(665, 275, 350, 78), "Public HF Space", palette=BLUE),
c.box(Rect(1105, 275, 380, 78), "One-Run Notebook / CLI", palette=BLUE),
c.box(Rect(1575, 275, 365, 78), "OpenEnv Validator", palette=BLUE),
]
api_boxes = [
c.box(Rect(260, 530, 330, 72), "app/api/routes.py", palette=VIOLET, title_size=21),
c.box(Rect(720, 530, 300, 72), "APIService", palette=VIOLET, title_size=21),
c.box(Rect(1180, 530, 370, 72), "PolicyProviderRouter", palette=VIOLET, title_size=21),
c.box(Rect(1680, 530, 350, 72), "app/env/fastapi_app.py", palette=VIOLET, title_size=20),
]
top_agents = [
c.box(Rect(215 + i * 405, 780, 285, 60), name, palette=TEAL, title_size=19)
for i, name in enumerate(["MedRec", "Evidence", "Graph Safety", "Dosing", "Candidate"])
]
bottom_agents = [
c.box(Rect(420 + i * 405, 865, 285, 60), name, palette=TEAL if name != "Contextual Bandit" else AMBER, title_size=19)
for i, name in enumerate(["Supervisor", "Planner", "Contextual Bandit", "Critic", "Explainer"])
]
runtime_boxes = [
c.box(Rect(185, 1110, 300, 78), "PolyGuardEnv", "stateful reset / step", palette=AMBER, title_size=22),
c.box(Rect(545, 1110, 240, 78), "Verifier", "legality gates", palette=AMBER, title_size=21),
c.box(Rect(845, 1110, 240, 78), "Reward Router", "13 components", palette=AMBER, title_size=21),
c.box(Rect(345, 1215, 240, 66), "Transition", palette=AMBER, title_size=20),
c.box(Rect(645, 1215, 240, 66), "Anti-Cheat", palette=AMBER, title_size=20),
]
asset_boxes = [
c.box(Rect(1305, 1100, 215, 68), "Scenarios", palette=MINT, title_size=20),
c.box(Rect(1560, 1100, 250, 68), "Drug Knowledge", palette=MINT, title_size=20),
c.box(Rect(1850, 1100, 250, 68), "Active Qwen", "adapter / merged", palette=MINT, title_size=20),
c.box(Rect(1305, 1215, 215, 68), "Retrieval Index", palette=MINT, title_size=20),
c.box(Rect(1560, 1215, 250, 68), "Evaluation Suites", palette=MINT, title_size=20),
c.box(Rect(1850, 1215, 250, 68), "docs/results", "charts + reports", palette=MINT, title_size=20),
]
for a, b in zip(api_boxes, api_boxes[1:]):
arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#7c3aed")
for a, b in zip(top_agents, top_agents[1:]):
arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#0f766e", width=3)
c.pill(Rect(990, 845, 420, 44), "routed planning and critique", TEAL, size=18)
for a, b in zip(bottom_agents, bottom_agents[1:]):
arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#0f766e", width=3)
arrow(c.draw, [anchor(runtime_boxes[0], "right"), anchor(runtime_boxes[1], "left")], color="#b45309")
arrow(c.draw, [anchor(runtime_boxes[1], "right"), anchor(runtime_boxes[2], "left")], color="#b45309")
arrow(c.draw, [anchor(runtime_boxes[0], "bottom"), anchor(runtime_boxes[3], "top")], color="#b45309")
arrow(c.draw, [anchor(runtime_boxes[2], "bottom"), anchor(runtime_boxes[4], "top")], color="#b45309")
arrow(c.draw, [anchor(asset_boxes[0], "right"), anchor(asset_boxes[1], "left")], color="#0891b2")
arrow(c.draw, [anchor(asset_boxes[1], "right"), anchor(asset_boxes[2], "left")], color="#0891b2")
arrow(c.draw, [anchor(asset_boxes[3], "right"), anchor(asset_boxes[4], "left")], color="#475569")
arrow(c.draw, [anchor(asset_boxes[4], "right"), anchor(asset_boxes[5], "left")], color="#475569")
arrow(c.draw, [(1200, 380), (1200, 440)], color="#3b82f6", label="requests")
arrow(c.draw, [(1200, 630), (1200, 690)], color="#7c3aed", label="orchestrates")
arrow(c.draw, [(760, 950), (760, 1015)], color="#0f766e", label="safe action")
arrow(c.draw, [(1725, 950), (1725, 1015)], color="#0891b2", label="model + evidence")
arrow(c.draw, [(1160, 1165), (1240, 1165)], color="#64748b", label="reports")
return c.save("system_architecture")
def runtime_step_flow() -> Path:
c = Chart(2400, 1320, "Runtime Step Flow", "How one reset or action moves through UI, API, policy, environment, and reward scoring.")
actors = [
("User", 130, BLUE),
("React Workbench", 430, BLUE),
("FastAPI APIService", 760, VIOLET),
("Orchestrator", 1100, TEAL),
("PolyGuardEnv", 1440, AMBER),
("Policy Provider", 1780, MINT),
("Reward Router", 2090, ROSE),
]
x_positions: dict[str, int] = {}
for name, x, pal in actors:
rect = c.box(Rect(x, 210, 220, 82), name, palette=pal, title_size=22)
x_positions[name] = rect.x + rect.w // 2
c.draw.line((x_positions[name], 315, x_positions[name], 1185), fill="#d0d9e6", width=3)
def msg(y: int, src: str, dst: str, label: str, color: str = LINE) -> None:
sx, dx = x_positions[src], x_positions[dst]
arrow(c.draw, [(sx, y), (dx, y)], color=color, width=4, label=label, label_offset=(0, -34))
msg(390, "User", "React Workbench", "reset / run")
msg(500, "React Workbench", "FastAPI APIService", "POST /env/reset")
msg(610, "FastAPI APIService", "PolyGuardEnv", "reset(seed, task)", "#b45309")
msg(720, "PolyGuardEnv", "FastAPI APIService", "observation + candidates", "#b45309")
msg(830, "React Workbench", "FastAPI APIService", "step_candidate or orchestrate")
msg(940, "FastAPI APIService", "Orchestrator", "agent path", "#0f766e")
msg(1050, "Orchestrator", "Policy Provider", "optional Qwen selection", "#0891b2")
msg(1160, "Orchestrator", "PolyGuardEnv", "final action", "#0f766e")
msg(1020, "PolyGuardEnv", "Reward Router", "13 components -> 4 channels", "#e11d48")
c.box(Rect(1290, 1160, 430, 90), "Response", "observation, reward, done, trace, info", palette=SLATE)
arrow(c.draw, [(1440 + 110, 1120), (1505, 1160)], color="#64748b")
arrow(c.draw, [(1290, 1205), (650, 1205), (650, 900)], color="#64748b", label="render updated panels", label_offset=(0, 16))
return c.save("runtime_step_flow")
def data_training_pipeline() -> Path:
c = Chart(2400, 1320, "Data And Training Pipeline", "From local knowledge and synthetic cases to SFT, GRPO, activation, and inference.")
groups = [
(Rect(90, 220, 430, 880), "Sources", BLUE),
(Rect(610, 220, 520, 880), "DataOps", TEAL),
(Rect(1220, 220, 520, 880), "Post-Training", VIOLET),
(Rect(1830, 220, 480, 880), "Validation And Use", AMBER),
]
for rect, title, pal in groups:
c.group(rect, title, pal)
sources = [
c.box(Rect(150, 325, 310, 76), "Local drug knowledge", palette=BLUE, title_size=21),
c.box(Rect(150, 435, 310, 76), "Synthetic patients", palette=BLUE, title_size=21),
c.box(Rect(150, 545, 310, 76), "Scenario files", "easy / medium / hard", palette=BLUE, title_size=21),
c.box(Rect(150, 655, 310, 76), "Optional HF data", palette=BLUE, title_size=21),
c.box(Rect(150, 765, 310, 76), "DDI API", "optional", palette=BLUE, title_size=21),
c.box(Rect(150, 875, 310, 76), "Web fallback", "optional", palette=BLUE, title_size=21),
]
dataops = [
c.box(Rect(700, 330, 340, 78), "Normalize drugs", palette=TEAL, title_size=22),
c.box(Rect(700, 465, 340, 78), "Build knowledge graph", palette=TEAL, title_size=22),
c.box(Rect(700, 600, 340, 78), "Build retrieval index", palette=TEAL, title_size=22),
c.box(Rect(700, 735, 340, 78), "Build scenarios", palette=TEAL, title_size=22),
c.box(Rect(700, 870, 340, 90), "Build SFT / GRPO corpus", palette=TEAL, title_size=22),
]
training = [
c.box(Rect(1310, 345, 340, 86), "TRL SFT adapter", palette=VIOLET, title_size=22),
c.box(Rect(1310, 505, 340, 86), "TRL GRPO", "environment reward", palette=VIOLET, title_size=22),
c.box(Rect(1310, 665, 340, 86), "Merge / export adapters", palette=VIOLET, title_size=22),
c.box(Rect(1310, 825, 340, 86), "Registry + manifests", palette=VIOLET, title_size=22),
]
validation = [
c.box(Rect(1905, 345, 310, 86), "Post-save inference", palette=AMBER, title_size=22),
c.box(Rect(1905, 505, 310, 86), "Activate model", palette=AMBER, title_size=22),
c.box(Rect(1905, 665, 310, 86), "/policy/model_status", palette=AMBER, title_size=21),
c.box(Rect(1905, 825, 310, 86), "/policy/infer", palette=AMBER, title_size=21),
]
for src in sources:
arrow(c.draw, [anchor(src, "right"), anchor(dataops[-1], "left")], color="#3b82f6", width=3)
for a, b in zip(dataops, dataops[1:]):
arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#0f766e")
arrow(c.draw, [anchor(dataops[-1], "right"), anchor(training[0], "left")], color="#7c3aed", label="corpus")
for a, b in zip(training, training[1:]):
arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#7c3aed")
arrow(c.draw, [anchor(training[-1], "right"), anchor(validation[0], "left")], color="#b45309", label="artifact")
for a, b in zip(validation, validation[1:]):
arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#b45309")
return c.save("data_training_pipeline")
def multi_agent_orchestration() -> Path:
c = Chart(2400, 1250, "Multi-Agent Orchestration", "Specialized agents build a verified candidate, route it through policy control, then close the loop with reward feedback.")
input_group = Rect(90, 250, 560, 850)
decision_group = Rect(760, 250, 780, 850)
closure_group = Rect(1650, 250, 660, 850)
for rect, title, pal in [
(input_group, "Candidate Construction", BLUE),
(decision_group, "Policy Control", VIOLET),
(closure_group, "Step Closure", AMBER),
]:
c.group(rect, title, pal)
inputs = [
c.box(Rect(210, 360, 320, 72), "State", palette=BLUE, title_size=22),
c.box(Rect(210, 465, 320, 72), "MedRec", palette=TEAL, title_size=22),
c.box(Rect(210, 570, 320, 72), "Evidence", palette=TEAL, title_size=22),
c.box(Rect(210, 675, 320, 72), "Graph Safety", palette=TEAL, title_size=22),
c.box(Rect(210, 780, 320, 72), "Dosing", palette=TEAL, title_size=22),
c.box(Rect(210, 885, 320, 82), "Candidate", "legal action set", palette=TEAL, title_size=22),
]
for a, b in zip(inputs, inputs[1:]):
arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#0f766e", width=3)
supervisor = c.box(Rect(1020, 360, 260, 82), "Supervisor", "routes context", palette=TEAL, title_size=22)
bandit = c.box(Rect(850, 525, 250, 82), "Bandit Top-K", "policy shortlist", palette=AMBER, title_size=22)
planner = c.box(Rect(1190, 525, 250, 82), "Planner", "drafts action", palette=VIOLET, title_size=22)
critic = c.box(Rect(1190, 700, 250, 82), "Critic", "checks action", palette=ROSE, title_size=22)
replan = c.box(Rect(850, 700, 250, 82), "Review / Replan", "on veto", palette=ROSE, title_size=22)
c.pill(Rect(850, 915, 590, 74), "coordination: supervisor routing | veto loop | lightweight debate", SLATE, size=21)
env_step = c.box(Rect(1855, 370, 250, 84), "Env Step", "apply transition", palette=AMBER, title_size=22)
explainer = c.box(Rect(1855, 540, 250, 84), "Explainer", "grounded rationale", palette=TEAL, title_size=22)
reward = c.box(Rect(1855, 710, 250, 84), "Reward + Trace", "step feedback", palette=SLATE, title_size=22)
update = c.box(Rect(1855, 880, 250, 84), "Bandit Update", "learn from reward", palette=AMBER, title_size=22)
arrow(c.draw, [anchor(inputs[-1], "right"), (705, 926), (705, 401), anchor(supervisor, "left")], color="#2563eb", label="candidate")
arrow(c.draw, [anchor(supervisor, "bottom"), (1150, 485), anchor(bandit, "top")], color="#b45309")
arrow(c.draw, [anchor(supervisor, "bottom"), (1150, 485), anchor(planner, "top")], color="#7c3aed")
arrow(c.draw, [anchor(bandit, "right"), anchor(planner, "left")], color="#b45309")
arrow(c.draw, [anchor(planner, "bottom"), anchor(critic, "top")], color="#7c3aed")
arrow(c.draw, [anchor(critic, "left"), anchor(replan, "right")], color="#e11d48", label="veto")
arrow(c.draw, [anchor(replan, "top"), (975, 650), (1315, 650), anchor(planner, "bottom")], color="#e11d48")
arrow(c.draw, [anchor(critic, "right"), anchor(env_step, "left")], color="#0f766e", label="approved")
arrow(c.draw, [anchor(env_step, "bottom"), anchor(explainer, "top")], color="#0f766e")
arrow(c.draw, [anchor(explainer, "bottom"), anchor(reward, "top")], color="#64748b")
arrow(c.draw, [anchor(reward, "bottom"), anchor(update, "top")], color="#b45309")
arrow(c.draw, [anchor(update, "left"), (1585, 922), (1585, 1055), (800, 1055), (800, 566), anchor(bandit, "left")], color="#b45309", label="reward learning", label_offset=(-170, 10))
return c.save("multi_agent_orchestration")
def reward_decomposition() -> Path:
c = Chart(2500, 1420, "Reward Decomposition", "Verifier-backed rewards remain inspectable through component columns and judge-friendly primary channels.")
action = c.box(Rect(930, 210, 640, 92), "Candidate action", "selected legal candidate or fallback", palette=BLUE)
checks = c.box(Rect(800, 360, 900, 94), "Verifier + Transition + Anti-Cheat + Uncertainty", palette=VIOLET, title_size=25)
arrow(c.draw, [anchor(action, "bottom"), anchor(checks, "top")], color="#7c3aed")
channel_specs = [
(
Rect(140, 575, 500, 455),
"safety_legality",
"legal and safe action choice",
["format compliance", "candidate alignment", "legality", "safety delta"],
ROSE,
),
(
Rect(730, 575, 500, 455),
"clinical_improvement",
"clinical risk moves in the right direction",
["burden improvement", "disease stability"],
TEAL,
),
(
Rect(1320, 575, 500, 455),
"dosing_quality",
"dose-sensitive decisions are handled",
["dosing quality"],
AMBER,
),
(
Rect(1910, 575, 500, 455),
"process_integrity",
"process, uncertainty, and anti-cheat safeguards",
["abstention quality", "efficiency", "process fidelity", "explanation grounding", "anti-cheat", "uncertainty calibration"],
VIOLET,
),
]
channels: list[Rect] = []
for rect, title, subtitle, components, pal in channel_specs:
c.group(rect, title, pal)
subtitle_font = font(21)
lines = wrap_lines(c.draw, subtitle, subtitle_font, rect.w - 64)
c.draw.multiline_text((rect.x + 32, rect.y + 86), "\n".join(lines), font=subtitle_font, fill=MUTED, spacing=5)
compact = len(components) > 4
y = rect.y + (148 if compact else 155)
pill_h = 42 if compact else 54
step = 50 if compact else 66
for item in components:
c.pill(Rect(rect.x + 44, y, rect.w - 88, pill_h), item, pal, size=17 if compact else 19)
y += step
channels.append(rect)
arrow(c.draw, [anchor(checks, "bottom"), (rect.x + rect.w // 2, 520), anchor(rect, "top")], color=pal[1], width=3)
total = c.box(Rect(930, 1230, 640, 102), "total_reward", "clamped to 0.001 - 0.999", palette=BLUE, title_size=28)
for ch in channels:
arrow(c.draw, [anchor(ch, "bottom"), anchor(total, "top")], color="#2563eb", width=3)
return c.save("reward_decomposition")
def episode_state_machine() -> Path:
c = Chart(2250, 1120, "Episode State Machine", "Terminal reasons are explicit, making rollouts auditable and reward hacking visible.")
nodes = {
"Start": c.box(Rect(100, 520, 190, 82), "Start", palette=BLUE),
"Reset": c.box(Rect(390, 520, 190, 82), "Reset", palette=BLUE),
"Observe": c.box(Rect(680, 520, 220, 82), "Observe", palette=TEAL),
"Select": c.box(Rect(1020, 500, 260, 122), "Candidate Selection", palette=TEAL),
"Verify": c.box(Rect(1420, 500, 240, 122), "Verification", palette=VIOLET),
"Transition": c.box(Rect(1810, 395, 245, 90), "Transition", palette=TEAL),
"Rollback": c.box(Rect(1810, 610, 245, 90), "Rollback", palette=ROSE),
"Reward": c.box(Rect(1450, 820, 250, 96), "Reward Scoring", palette=AMBER),
"Continue": c.box(Rect(980, 820, 250, 96), "Continue", palette=SLATE),
"Done": c.box(Rect(1950, 820, 220, 96), "Done", palette=BLUE),
}
chain = ["Start", "Reset", "Observe", "Select", "Verify"]
for a, b in zip(chain, chain[1:]):
arrow(c.draw, [anchor(nodes[a], "right"), anchor(nodes[b], "left")], color="#475569")
arrow(c.draw, [anchor(nodes["Verify"], "right"), anchor(nodes["Transition"], "left")], color="#0f766e", label="legal")
arrow(c.draw, [anchor(nodes["Verify"], "right"), (1725, 560), anchor(nodes["Rollback"], "left")], color="#e11d48", label="blocked")
arrow(c.draw, [anchor(nodes["Transition"], "bottom"), (1930, 780), anchor(nodes["Reward"], "right")], color="#b45309")
arrow(c.draw, [anchor(nodes["Rollback"], "bottom"), (1930, 780), anchor(nodes["Reward"], "right")], color="#b45309")
arrow(c.draw, [anchor(nodes["Reward"], "left"), anchor(nodes["Continue"], "right")], color="#64748b", label="budget remains")
arrow(c.draw, [anchor(nodes["Continue"], "top"), (1105, 690), (790, 690), anchor(nodes["Observe"], "bottom")], color="#64748b")
reasons = ["safe resolution", "review escalation", "exploit detected", "timeout", "budget exhausted"]
for i, reason in enumerate(reasons):
y = 760 + i * 50
c.pill(Rect(1735, y, 175, 36), reason, SLATE, size=16)
arrow(c.draw, [(1910, y + 18), anchor(nodes["Done"], "left")], color="#2563eb", width=2)
return c.save("episode_state_machine")
def deployment_topology() -> Path:
c = Chart(2400, 1380, "Deployment Topology", "Local services, public product Space, private training Space, and artifact exchange on Hugging Face Hub.")
local = Rect(100, 245, 580, 830)
product = Rect(810, 245, 600, 350)
training = Rect(810, 725, 600, 350)
hub = Rect(1540, 245, 760, 830)
for rect, title, pal in [
(local, "Local Developer Machine", BLUE),
(product, "Public Product Space", TEAL),
(training, "Private Training Space", VIOLET),
(hub, "Hugging Face Hub", AMBER),
]:
c.group(rect, title, pal)
repo = c.box(Rect(240, 365, 300, 86), "polyguard-rl repo", palette=BLUE, title_size=22)
local_runtime = c.box(Rect(165, 545, 210, 82), "Local API", ":8200", palette=VIOLET, title_size=21)
local_env = c.box(Rect(405, 545, 210, 82), "OpenEnv", ":8201", palette=AMBER, title_size=21)
vite = c.box(Rect(165, 695, 210, 82), "Vite UI", ":5173", palette=BLUE, title_size=21)
checks = c.box(Rect(405, 695, 210, 82), "Checks", "pytest / validate / gate", palette=SLATE, title_size=21)
space_bundle = c.box(Rect(955, 365, 310, 84), "Product Docker Bundle", palette=TEAL, title_size=22)
product_runtime = c.box(Rect(890, 500, 205, 76), "FastAPI Runtime", palette=TEAL, title_size=19)
product_ui = c.box(Rect(1135, 500, 205, 76), "React Workbench", palette=TEAL, title_size=19)
train_bundle = c.box(Rect(955, 845, 310, 84), "Training Docker Space", palette=VIOLET, title_size=22)
runner = c.box(Rect(890, 980, 205, 76), "Gradio Runner", palette=VIOLET, title_size=19)
gpu = c.box(Rect(1135, 980, 205, 76), "HF GPU A10G", palette=VIOLET, title_size=19)
product_repo = c.box(Rect(1625, 360, 265, 86), "Product Space Repo", "polyguard-openenv", palette=AMBER, title_size=21)
training_repo = c.box(Rect(1975, 360, 240, 86), "Training Space Repo", palette=AMBER, title_size=21)
artifact_repo = c.box(Rect(1625, 610, 265, 86), "Artifact Repo", "adapters / reports", palette=AMBER, title_size=21)
evidence_repo = c.box(Rect(1975, 610, 240, 86), "Evidence Space", palette=AMBER, title_size=21)
docs = c.box(Rect(1780, 850, 275, 86), "Local docs/results", "pulled evidence", palette=SLATE, title_size=21)
for target in [local_runtime, local_env, vite, checks]:
arrow(c.draw, [anchor(repo, "bottom"), anchor(target, "top")], color="#2563eb")
arrow(c.draw, [anchor(repo, "right"), anchor(space_bundle, "left")], color="#0f766e", label="deploy product")
arrow(c.draw, [anchor(repo, "right"), (745, 885), anchor(train_bundle, "left")], color="#7c3aed", label="deploy training")
arrow(c.draw, [anchor(space_bundle, "right"), anchor(product_repo, "left")], color="#0f766e")
arrow(c.draw, [anchor(space_bundle, "bottom"), anchor(product_runtime, "top")], color="#0f766e")
arrow(c.draw, [anchor(space_bundle, "bottom"), anchor(product_ui, "top")], color="#0f766e")
arrow(c.draw, [anchor(train_bundle, "right"), anchor(training_repo, "left")], color="#7c3aed")
arrow(c.draw, [anchor(train_bundle, "bottom"), anchor(runner, "top")], color="#7c3aed")
arrow(c.draw, [anchor(runner, "right"), anchor(gpu, "left")], color="#7c3aed")
arrow(c.draw, [anchor(runner, "right"), anchor(artifact_repo, "left")], color="#b45309", label="upload")
arrow(c.draw, [anchor(artifact_repo, "right"), anchor(evidence_repo, "left")], color="#b45309")
arrow(c.draw, [anchor(artifact_repo, "bottom"), anchor(docs, "top")], color="#64748b", label="pull")
return c.save("deployment_topology")
def evidence_generation_flow() -> Path:
c = Chart(2300, 980, "Evidence Generation Flow", "Training outputs are converted into reviewer-facing reports, plots, bundles, and README claims.")
train = c.box(Rect(100, 435, 250, 96), "SFT / GRPO Runs", palette=VIOLET)
reports = c.box(Rect(465, 320, 260, 90), "Run Reports", palette=AMBER)
checkpoints = c.box(Rect(465, 560, 260, 90), "Adapters + Merged Artifacts", palette=AMBER, title_size=22)
pull = c.box(Rect(850, 435, 260, 96), "Pull Training Artifacts", palette=BLUE)
post = c.box(Rect(1250, 260, 290, 90), "Post-Save Inference", palette=TEAL)
ablations = c.box(Rect(1250, 435, 290, 90), "Policy-Stack Ablations", palette=TEAL)
benchmarks = c.box(Rect(1250, 610, 290, 90), "Benchmarks + Robustness", palette=TEAL)
charts = c.box(Rect(1655, 435, 210, 90), "Charts", palette=ROSE)
results = c.box(Rect(1955, 320, 260, 80), "docs/results", palette=SLATE, title_size=22)
bundle = c.box(Rect(1955, 455, 260, 80), "Submission Bundle", palette=SLATE, title_size=22)
readme = c.box(Rect(1955, 590, 260, 80), "README Claims", palette=SLATE, title_size=22)
arrow(c.draw, [anchor(train, "right"), anchor(reports, "left")], color="#b45309")
arrow(c.draw, [anchor(train, "right"), anchor(checkpoints, "left")], color="#b45309")
arrow(c.draw, [anchor(reports, "right"), anchor(pull, "left")], color="#2563eb")
arrow(c.draw, [anchor(checkpoints, "right"), anchor(pull, "left")], color="#2563eb")
for target in [post, ablations, benchmarks]:
arrow(c.draw, [anchor(pull, "right"), anchor(target, "left")], color="#0f766e")
arrow(c.draw, [anchor(target, "right"), anchor(charts, "left")], color="#e11d48")
arrow(c.draw, [anchor(charts, "right"), anchor(results, "left")], color="#64748b")
arrow(c.draw, [anchor(charts, "right"), anchor(bundle, "left")], color="#64748b")
arrow(c.draw, [anchor(charts, "right"), anchor(readme, "left")], color="#64748b")
return c.save("evidence_generation_flow")
def frontend_runtime_surface() -> Path:
c = Chart(2300, 1350, "Frontend Runtime Surface", "React pages map to concrete FastAPI endpoints used by the Patient Workbench and supporting views.")
pages_group = Rect(100, 245, 560, 965)
api_group = Rect(780, 245, 720, 965)
runtime_group = Rect(1640, 245, 560, 965)
c.group(pages_group, "React App Pages", BLUE)
c.group(api_group, "API Endpoints", TEAL)
c.group(runtime_group, "Backend Runtime", VIOLET)
app = c.box(Rect(230, 365, 300, 90), "React App", palette=BLUE)
pages = [
c.box(Rect(190, 525, 340, 78), "Patient Workbench", palette=BLUE, title_size=21),
c.box(Rect(190, 665, 340, 78), "Policy Lab", palette=BLUE, title_size=21),
c.box(Rect(190, 805, 340, 78), "Safety + Dosing Views", palette=BLUE, title_size=21),
c.box(Rect(190, 945, 340, 78), "Replay + Training Views", palette=BLUE, title_size=21),
]
c.pill(Rect(190, 1090, 340, 48), "shared fetchJson client", BLUE, size=18)
arrow(c.draw, [anchor(app, "bottom"), anchor(pages[0], "top")], color="#2563eb", width=3)
for a, b in zip(pages, pages[1:]):
arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#2563eb", width=3)
endpoint_cards = [
c.box(
Rect(900, 485, 470, 116),
"Session + Step",
"POST /env/reset\nPOST /env/step_candidate",
palette=TEAL,
title_size=22,
body_size=18,
),
c.box(
Rect(900, 655, 470, 132),
"Policy + Safety",
"POST /agents/orchestrate\nGET /env/reward_breakdown\nGET /policy/model_status",
palette=TEAL,
title_size=22,
body_size=17,
),
c.box(
Rect(900, 850, 470, 116),
"Evaluation",
"POST /eval/run_baselines\nPOST /eval/run_dosing",
palette=TEAL,
title_size=22,
body_size=18,
),
c.box(
Rect(900, 1035, 470, 116),
"Trace + Metrics",
"GET /env/trace\nGET /metrics/training",
palette=TEAL,
title_size=22,
body_size=18,
),
]
for page, endpoint in zip(pages, endpoint_cards):
arrow(c.draw, [anchor(page, "right"), anchor(endpoint, "left")], color="#0f766e", width=3)
c.pill(Rect(945, 1170, 380, 48), "all calls use API_BASE", TEAL, size=18)
api = c.box(Rect(1785, 395, 270, 96), "FastAPI API", palette=VIOLET)
env = c.box(Rect(1785, 610, 270, 96), "PolyGuardEnv", palette=AMBER)
policy = c.box(Rect(1785, 825, 270, 96), "Policy Runtime", palette=MINT)
evals = c.box(Rect(1785, 1040, 270, 96), "Eval + Metrics", palette=SLATE)
arrow(c.draw, [anchor(api_group, "right"), anchor(api, "left")], color="#7c3aed", width=4, label="fetchJson")
arrow(c.draw, [anchor(api, "bottom"), anchor(env, "top")], color="#b45309")
arrow(c.draw, [anchor(env, "bottom"), anchor(policy, "top")], color="#0891b2")
arrow(c.draw, [anchor(policy, "bottom"), anchor(evals, "top")], color="#64748b")
return c.save("frontend_runtime_surface")
RENDERERS = [
system_architecture,
runtime_step_flow,
data_training_pipeline,
multi_agent_orchestration,
reward_decomposition,
episode_state_machine,
deployment_topology,
evidence_generation_flow,
frontend_runtime_surface,
]
def main() -> None:
OUT_DIR.mkdir(parents=True, exist_ok=True)
for existing in OUT_DIR.glob("*.png"):
existing.unlink()
rendered = [renderer() for renderer in RENDERERS]
print("rendered_diagrams:")
for path in rendered:
print(path.relative_to(ROOT))
if __name__ == "__main__":
main()