| |
| """Render polished PolyGuard architecture diagrams as individual PNG charts.""" |
|
|
| from __future__ import annotations |
|
|
| import math |
| import textwrap |
| from dataclasses import dataclass |
| from pathlib import Path |
| from typing import Iterable |
|
|
| from PIL import Image, ImageDraw, ImageFont |
|
|
|
|
| ROOT = Path(__file__).resolve().parents[1] |
| OUT_DIR = ROOT / "docs" / "assets" / "diagrams" |
|
|
| FONT_REGULAR = "/System/Library/Fonts/Supplemental/Arial.ttf" |
| FONT_BOLD = "/System/Library/Fonts/Supplemental/Arial Bold.ttf" |
|
|
| BG = "#f6f8fb" |
| INK = "#172033" |
| MUTED = "#64748b" |
| LINE = "#718096" |
| WHITE = "#ffffff" |
|
|
| BLUE = ("#eaf3ff", "#2563eb") |
| VIOLET = ("#f3edff", "#7c3aed") |
| TEAL = ("#e8f8f2", "#0f766e") |
| AMBER = ("#fff5df", "#b45309") |
| ROSE = ("#fff1f3", "#e11d48") |
| SLATE = ("#eef2f7", "#475569") |
| MINT = ("#e7f7fb", "#0891b2") |
|
|
|
|
| @dataclass(frozen=True) |
| class Rect: |
| x: int |
| y: int |
| w: int |
| h: int |
|
|
|
|
| def font(size: int, bold: bool = False) -> ImageFont.FreeTypeFont: |
| path = FONT_BOLD if bold else FONT_REGULAR |
| return ImageFont.truetype(path, size) |
|
|
|
|
| def text_size(draw: ImageDraw.ImageDraw, text: str, fnt: ImageFont.FreeTypeFont) -> tuple[int, int]: |
| if not text: |
| return 0, 0 |
| box = draw.textbbox((0, 0), text, font=fnt) |
| return box[2] - box[0], box[3] - box[1] |
|
|
|
|
| def wrap_lines(draw: ImageDraw.ImageDraw, text: str, fnt: ImageFont.FreeTypeFont, max_width: int) -> list[str]: |
| lines: list[str] = [] |
| for part in text.split("\n"): |
| if not part.strip(): |
| lines.append("") |
| continue |
| words = part.split() |
| current = "" |
| for word in words: |
| candidate = word if not current else f"{current} {word}" |
| if text_size(draw, candidate, fnt)[0] <= max_width: |
| current = candidate |
| else: |
| if current: |
| lines.append(current) |
| current = word |
| else: |
| chunks = textwrap.wrap(word, width=max(8, max_width // max(1, fnt.size))) |
| lines.extend(chunks[:-1]) |
| current = chunks[-1] if chunks else word |
| if current: |
| lines.append(current) |
| return lines |
|
|
|
|
| def draw_centered_lines( |
| draw: ImageDraw.ImageDraw, |
| lines: Iterable[str], |
| fnt: ImageFont.FreeTypeFont, |
| x: int, |
| y: int, |
| w: int, |
| fill: str = INK, |
| line_gap: int = 7, |
| ) -> int: |
| yy = y |
| for line in lines: |
| tw, th = text_size(draw, line, fnt) |
| draw.text((x + (w - tw) / 2, yy), line, font=fnt, fill=fill) |
| yy += th + line_gap |
| return yy |
|
|
|
|
| def rounded( |
| draw: ImageDraw.ImageDraw, |
| rect: Rect, |
| fill: str, |
| outline: str = "#d5deea", |
| width: int = 2, |
| radius: int = 22, |
| shadow: bool = True, |
| ) -> None: |
| if shadow: |
| shadow_rect = (rect.x + 8, rect.y + 10, rect.x + rect.w + 8, rect.y + rect.h + 10) |
| draw.rounded_rectangle(shadow_rect, radius=radius, fill="#dfe6ef") |
| draw.rounded_rectangle( |
| (rect.x, rect.y, rect.x + rect.w, rect.y + rect.h), |
| radius=radius, |
| fill=fill, |
| outline=outline, |
| width=width, |
| ) |
|
|
|
|
| def anchor(rect: Rect, side: str) -> tuple[int, int]: |
| if side == "top": |
| return rect.x + rect.w // 2, rect.y |
| if side == "bottom": |
| return rect.x + rect.w // 2, rect.y + rect.h |
| if side == "left": |
| return rect.x, rect.y + rect.h // 2 |
| if side == "right": |
| return rect.x + rect.w, rect.y + rect.h // 2 |
| return rect.x + rect.w // 2, rect.y + rect.h // 2 |
|
|
|
|
| def arrow( |
| draw: ImageDraw.ImageDraw, |
| points: list[tuple[int, int]], |
| color: str = LINE, |
| width: int = 4, |
| label: str | None = None, |
| label_offset: tuple[int, int] = (0, -26), |
| ) -> None: |
| draw.line(points, fill=color, width=width, joint="curve") |
| if len(points) < 2: |
| return |
| x1, y1 = points[-2] |
| x2, y2 = points[-1] |
| angle = math.atan2(y2 - y1, x2 - x1) |
| size = 17 |
| left = (x2 - size * math.cos(angle - math.pi / 7), y2 - size * math.sin(angle - math.pi / 7)) |
| right = (x2 - size * math.cos(angle + math.pi / 7), y2 - size * math.sin(angle + math.pi / 7)) |
| draw.polygon([(x2, y2), left, right], fill=color) |
| if label: |
| mx = (x1 + x2) // 2 + label_offset[0] |
| my = (y1 + y2) // 2 + label_offset[1] |
| fnt = font(22, bold=True) |
| tw, th = text_size(draw, label, fnt) |
| pad_x, pad_y = 12, 6 |
| draw.rounded_rectangle( |
| (mx - tw / 2 - pad_x, my - pad_y, mx + tw / 2 + pad_x, my + th + pad_y), |
| radius=12, |
| fill=WHITE, |
| outline="#dbe3ee", |
| ) |
| draw.text((mx - tw / 2, my), label, font=fnt, fill=color) |
|
|
|
|
| class Chart: |
| def __init__(self, width: int, height: int, title: str, subtitle: str = "") -> None: |
| self.width = width |
| self.height = height |
| self.image = Image.new("RGB", (width, height), BG) |
| self.draw = ImageDraw.Draw(self.image) |
| self.title(title, subtitle) |
|
|
| def title(self, title: str, subtitle: str = "") -> None: |
| self.draw.rectangle((0, 0, self.width, 14), fill="#1d4ed8") |
| self.draw.text((76, 54), title, font=font(58, bold=True), fill=INK) |
| if subtitle: |
| self.draw.text((78, 126), subtitle, font=font(26), fill=MUTED) |
|
|
| def group(self, rect: Rect, title: str, palette: tuple[str, str]) -> None: |
| fill, accent = palette |
| rounded(self.draw, rect, fill=fill, outline="#cbd5e1", width=2, radius=30, shadow=False) |
| self.draw.rounded_rectangle( |
| (rect.x, rect.y, rect.x + rect.w, rect.y + 64), |
| radius=30, |
| fill=accent, |
| ) |
| self.draw.rectangle((rect.x, rect.y + 34, rect.x + rect.w, rect.y + 64), fill=accent) |
| self.draw.text((rect.x + 26, rect.y + 18), title, font=font(26, bold=True), fill=WHITE) |
|
|
| def box( |
| self, |
| rect: Rect, |
| title: str, |
| body: str = "", |
| palette: tuple[str, str] = SLATE, |
| title_size: int = 25, |
| body_size: int = 20, |
| center: bool = True, |
| ) -> Rect: |
| fill, accent = palette |
| rounded(self.draw, rect, fill=WHITE, outline="#cbd5e1", width=2, radius=20, shadow=True) |
| self.draw.rounded_rectangle((rect.x, rect.y, rect.x + 10, rect.y + rect.h), radius=20, fill=accent) |
| title_font = font(title_size, bold=True) |
| body_font = font(body_size) |
| max_width = rect.w - 46 |
| title_lines = wrap_lines(self.draw, title, title_font, max_width) |
| body_lines = wrap_lines(self.draw, body, body_font, max_width) if body else [] |
| title_height = sum(text_size(self.draw, line, title_font)[1] for line in title_lines) + max(0, len(title_lines) - 1) * 7 |
| body_height = sum(text_size(self.draw, line, body_font)[1] for line in body_lines) + max(0, len(body_lines) - 1) * 6 |
| gap = 10 if body_lines else 0 |
| total = title_height + body_height + gap |
| yy = rect.y + max(18, (rect.h - total) // 2) if center else rect.y + 20 |
| if center: |
| yy = draw_centered_lines(self.draw, title_lines, title_font, rect.x + 22, yy, max_width, INK) |
| if body_lines: |
| yy += gap |
| draw_centered_lines(self.draw, body_lines, body_font, rect.x + 22, yy, max_width, MUTED, line_gap=6) |
| else: |
| self.draw.multiline_text((rect.x + 28, yy), "\n".join(title_lines), font=title_font, fill=INK, spacing=7) |
| yy += title_height + gap |
| if body_lines: |
| self.draw.multiline_text((rect.x + 28, yy), "\n".join(body_lines), font=body_font, fill=MUTED, spacing=6) |
| return rect |
|
|
| def pill(self, rect: Rect, text: str, palette: tuple[str, str], size: int = 21) -> Rect: |
| fill, accent = palette |
| self.draw.rounded_rectangle( |
| (rect.x, rect.y, rect.x + rect.w, rect.y + rect.h), |
| radius=rect.h // 2, |
| fill=fill, |
| outline=accent, |
| width=2, |
| ) |
| lines = wrap_lines(self.draw, text, font(size, bold=True), rect.w - 28) |
| total_h = len(lines) * (size + 6) |
| draw_centered_lines(self.draw, lines, font(size, bold=True), rect.x + 14, rect.y + (rect.h - total_h) // 2, rect.w - 28, INK, line_gap=4) |
| return rect |
|
|
| def save(self, name: str) -> Path: |
| OUT_DIR.mkdir(parents=True, exist_ok=True) |
| path = OUT_DIR / f"{name}.png" |
| self.image.save(path, quality=96) |
| return path |
|
|
|
|
| def system_architecture() -> Path: |
| c = Chart( |
| 2400, |
| 1500, |
| "PolyGuard System Architecture", |
| "Research environment, policy stack, OpenEnv runtime, model artifacts, and evidence outputs.", |
| ) |
| clients = Rect(110, 190, 2180, 190) |
| api = Rect(110, 440, 2180, 190) |
| agents = Rect(110, 690, 2180, 260) |
| runtime = Rect(110, 1015, 1050, 300) |
| assets = Rect(1240, 1015, 1050, 300) |
| for rect, title, pal in [ |
| (clients, "User And Integration Surfaces", BLUE), |
| (api, "API And OpenEnv Surface", VIOLET), |
| (agents, "Multi-Agent Policy Stack", TEAL), |
| (runtime, "OpenEnv Runtime And Rewards", AMBER), |
| (assets, "Data, Models, And Evidence Outputs", MINT), |
| ]: |
| c.group(rect, title, pal) |
|
|
| client_boxes = [ |
| c.box(Rect(185, 275, 390, 78), "React Patient Workbench", palette=BLUE), |
| c.box(Rect(665, 275, 350, 78), "Public HF Space", palette=BLUE), |
| c.box(Rect(1105, 275, 380, 78), "One-Run Notebook / CLI", palette=BLUE), |
| c.box(Rect(1575, 275, 365, 78), "OpenEnv Validator", palette=BLUE), |
| ] |
| api_boxes = [ |
| c.box(Rect(260, 530, 330, 72), "app/api/routes.py", palette=VIOLET, title_size=21), |
| c.box(Rect(720, 530, 300, 72), "APIService", palette=VIOLET, title_size=21), |
| c.box(Rect(1180, 530, 370, 72), "PolicyProviderRouter", palette=VIOLET, title_size=21), |
| c.box(Rect(1680, 530, 350, 72), "app/env/fastapi_app.py", palette=VIOLET, title_size=20), |
| ] |
| top_agents = [ |
| c.box(Rect(215 + i * 405, 780, 285, 60), name, palette=TEAL, title_size=19) |
| for i, name in enumerate(["MedRec", "Evidence", "Graph Safety", "Dosing", "Candidate"]) |
| ] |
| bottom_agents = [ |
| c.box(Rect(420 + i * 405, 865, 285, 60), name, palette=TEAL if name != "Contextual Bandit" else AMBER, title_size=19) |
| for i, name in enumerate(["Supervisor", "Planner", "Contextual Bandit", "Critic", "Explainer"]) |
| ] |
| runtime_boxes = [ |
| c.box(Rect(185, 1110, 300, 78), "PolyGuardEnv", "stateful reset / step", palette=AMBER, title_size=22), |
| c.box(Rect(545, 1110, 240, 78), "Verifier", "legality gates", palette=AMBER, title_size=21), |
| c.box(Rect(845, 1110, 240, 78), "Reward Router", "13 components", palette=AMBER, title_size=21), |
| c.box(Rect(345, 1215, 240, 66), "Transition", palette=AMBER, title_size=20), |
| c.box(Rect(645, 1215, 240, 66), "Anti-Cheat", palette=AMBER, title_size=20), |
| ] |
| asset_boxes = [ |
| c.box(Rect(1305, 1100, 215, 68), "Scenarios", palette=MINT, title_size=20), |
| c.box(Rect(1560, 1100, 250, 68), "Drug Knowledge", palette=MINT, title_size=20), |
| c.box(Rect(1850, 1100, 250, 68), "Active Qwen", "adapter / merged", palette=MINT, title_size=20), |
| c.box(Rect(1305, 1215, 215, 68), "Retrieval Index", palette=MINT, title_size=20), |
| c.box(Rect(1560, 1215, 250, 68), "Evaluation Suites", palette=MINT, title_size=20), |
| c.box(Rect(1850, 1215, 250, 68), "docs/results", "charts + reports", palette=MINT, title_size=20), |
| ] |
|
|
| for a, b in zip(api_boxes, api_boxes[1:]): |
| arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#7c3aed") |
| for a, b in zip(top_agents, top_agents[1:]): |
| arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#0f766e", width=3) |
| c.pill(Rect(990, 845, 420, 44), "routed planning and critique", TEAL, size=18) |
| for a, b in zip(bottom_agents, bottom_agents[1:]): |
| arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#0f766e", width=3) |
| arrow(c.draw, [anchor(runtime_boxes[0], "right"), anchor(runtime_boxes[1], "left")], color="#b45309") |
| arrow(c.draw, [anchor(runtime_boxes[1], "right"), anchor(runtime_boxes[2], "left")], color="#b45309") |
| arrow(c.draw, [anchor(runtime_boxes[0], "bottom"), anchor(runtime_boxes[3], "top")], color="#b45309") |
| arrow(c.draw, [anchor(runtime_boxes[2], "bottom"), anchor(runtime_boxes[4], "top")], color="#b45309") |
| arrow(c.draw, [anchor(asset_boxes[0], "right"), anchor(asset_boxes[1], "left")], color="#0891b2") |
| arrow(c.draw, [anchor(asset_boxes[1], "right"), anchor(asset_boxes[2], "left")], color="#0891b2") |
| arrow(c.draw, [anchor(asset_boxes[3], "right"), anchor(asset_boxes[4], "left")], color="#475569") |
| arrow(c.draw, [anchor(asset_boxes[4], "right"), anchor(asset_boxes[5], "left")], color="#475569") |
| arrow(c.draw, [(1200, 380), (1200, 440)], color="#3b82f6", label="requests") |
| arrow(c.draw, [(1200, 630), (1200, 690)], color="#7c3aed", label="orchestrates") |
| arrow(c.draw, [(760, 950), (760, 1015)], color="#0f766e", label="safe action") |
| arrow(c.draw, [(1725, 950), (1725, 1015)], color="#0891b2", label="model + evidence") |
| arrow(c.draw, [(1160, 1165), (1240, 1165)], color="#64748b", label="reports") |
| return c.save("system_architecture") |
|
|
|
|
| def runtime_step_flow() -> Path: |
| c = Chart(2400, 1320, "Runtime Step Flow", "How one reset or action moves through UI, API, policy, environment, and reward scoring.") |
| actors = [ |
| ("User", 130, BLUE), |
| ("React Workbench", 430, BLUE), |
| ("FastAPI APIService", 760, VIOLET), |
| ("Orchestrator", 1100, TEAL), |
| ("PolyGuardEnv", 1440, AMBER), |
| ("Policy Provider", 1780, MINT), |
| ("Reward Router", 2090, ROSE), |
| ] |
| x_positions: dict[str, int] = {} |
| for name, x, pal in actors: |
| rect = c.box(Rect(x, 210, 220, 82), name, palette=pal, title_size=22) |
| x_positions[name] = rect.x + rect.w // 2 |
| c.draw.line((x_positions[name], 315, x_positions[name], 1185), fill="#d0d9e6", width=3) |
|
|
| def msg(y: int, src: str, dst: str, label: str, color: str = LINE) -> None: |
| sx, dx = x_positions[src], x_positions[dst] |
| arrow(c.draw, [(sx, y), (dx, y)], color=color, width=4, label=label, label_offset=(0, -34)) |
|
|
| msg(390, "User", "React Workbench", "reset / run") |
| msg(500, "React Workbench", "FastAPI APIService", "POST /env/reset") |
| msg(610, "FastAPI APIService", "PolyGuardEnv", "reset(seed, task)", "#b45309") |
| msg(720, "PolyGuardEnv", "FastAPI APIService", "observation + candidates", "#b45309") |
| msg(830, "React Workbench", "FastAPI APIService", "step_candidate or orchestrate") |
| msg(940, "FastAPI APIService", "Orchestrator", "agent path", "#0f766e") |
| msg(1050, "Orchestrator", "Policy Provider", "optional Qwen selection", "#0891b2") |
| msg(1160, "Orchestrator", "PolyGuardEnv", "final action", "#0f766e") |
| msg(1020, "PolyGuardEnv", "Reward Router", "13 components -> 4 channels", "#e11d48") |
| c.box(Rect(1290, 1160, 430, 90), "Response", "observation, reward, done, trace, info", palette=SLATE) |
| arrow(c.draw, [(1440 + 110, 1120), (1505, 1160)], color="#64748b") |
| arrow(c.draw, [(1290, 1205), (650, 1205), (650, 900)], color="#64748b", label="render updated panels", label_offset=(0, 16)) |
| return c.save("runtime_step_flow") |
|
|
|
|
| def data_training_pipeline() -> Path: |
| c = Chart(2400, 1320, "Data And Training Pipeline", "From local knowledge and synthetic cases to SFT, GRPO, activation, and inference.") |
| groups = [ |
| (Rect(90, 220, 430, 880), "Sources", BLUE), |
| (Rect(610, 220, 520, 880), "DataOps", TEAL), |
| (Rect(1220, 220, 520, 880), "Post-Training", VIOLET), |
| (Rect(1830, 220, 480, 880), "Validation And Use", AMBER), |
| ] |
| for rect, title, pal in groups: |
| c.group(rect, title, pal) |
| sources = [ |
| c.box(Rect(150, 325, 310, 76), "Local drug knowledge", palette=BLUE, title_size=21), |
| c.box(Rect(150, 435, 310, 76), "Synthetic patients", palette=BLUE, title_size=21), |
| c.box(Rect(150, 545, 310, 76), "Scenario files", "easy / medium / hard", palette=BLUE, title_size=21), |
| c.box(Rect(150, 655, 310, 76), "Optional HF data", palette=BLUE, title_size=21), |
| c.box(Rect(150, 765, 310, 76), "DDI API", "optional", palette=BLUE, title_size=21), |
| c.box(Rect(150, 875, 310, 76), "Web fallback", "optional", palette=BLUE, title_size=21), |
| ] |
| dataops = [ |
| c.box(Rect(700, 330, 340, 78), "Normalize drugs", palette=TEAL, title_size=22), |
| c.box(Rect(700, 465, 340, 78), "Build knowledge graph", palette=TEAL, title_size=22), |
| c.box(Rect(700, 600, 340, 78), "Build retrieval index", palette=TEAL, title_size=22), |
| c.box(Rect(700, 735, 340, 78), "Build scenarios", palette=TEAL, title_size=22), |
| c.box(Rect(700, 870, 340, 90), "Build SFT / GRPO corpus", palette=TEAL, title_size=22), |
| ] |
| training = [ |
| c.box(Rect(1310, 345, 340, 86), "TRL SFT adapter", palette=VIOLET, title_size=22), |
| c.box(Rect(1310, 505, 340, 86), "TRL GRPO", "environment reward", palette=VIOLET, title_size=22), |
| c.box(Rect(1310, 665, 340, 86), "Merge / export adapters", palette=VIOLET, title_size=22), |
| c.box(Rect(1310, 825, 340, 86), "Registry + manifests", palette=VIOLET, title_size=22), |
| ] |
| validation = [ |
| c.box(Rect(1905, 345, 310, 86), "Post-save inference", palette=AMBER, title_size=22), |
| c.box(Rect(1905, 505, 310, 86), "Activate model", palette=AMBER, title_size=22), |
| c.box(Rect(1905, 665, 310, 86), "/policy/model_status", palette=AMBER, title_size=21), |
| c.box(Rect(1905, 825, 310, 86), "/policy/infer", palette=AMBER, title_size=21), |
| ] |
| for src in sources: |
| arrow(c.draw, [anchor(src, "right"), anchor(dataops[-1], "left")], color="#3b82f6", width=3) |
| for a, b in zip(dataops, dataops[1:]): |
| arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#0f766e") |
| arrow(c.draw, [anchor(dataops[-1], "right"), anchor(training[0], "left")], color="#7c3aed", label="corpus") |
| for a, b in zip(training, training[1:]): |
| arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#7c3aed") |
| arrow(c.draw, [anchor(training[-1], "right"), anchor(validation[0], "left")], color="#b45309", label="artifact") |
| for a, b in zip(validation, validation[1:]): |
| arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#b45309") |
| return c.save("data_training_pipeline") |
|
|
|
|
| def multi_agent_orchestration() -> Path: |
| c = Chart(2400, 1250, "Multi-Agent Orchestration", "Specialized agents build a verified candidate, route it through policy control, then close the loop with reward feedback.") |
| input_group = Rect(90, 250, 560, 850) |
| decision_group = Rect(760, 250, 780, 850) |
| closure_group = Rect(1650, 250, 660, 850) |
| for rect, title, pal in [ |
| (input_group, "Candidate Construction", BLUE), |
| (decision_group, "Policy Control", VIOLET), |
| (closure_group, "Step Closure", AMBER), |
| ]: |
| c.group(rect, title, pal) |
|
|
| inputs = [ |
| c.box(Rect(210, 360, 320, 72), "State", palette=BLUE, title_size=22), |
| c.box(Rect(210, 465, 320, 72), "MedRec", palette=TEAL, title_size=22), |
| c.box(Rect(210, 570, 320, 72), "Evidence", palette=TEAL, title_size=22), |
| c.box(Rect(210, 675, 320, 72), "Graph Safety", palette=TEAL, title_size=22), |
| c.box(Rect(210, 780, 320, 72), "Dosing", palette=TEAL, title_size=22), |
| c.box(Rect(210, 885, 320, 82), "Candidate", "legal action set", palette=TEAL, title_size=22), |
| ] |
| for a, b in zip(inputs, inputs[1:]): |
| arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#0f766e", width=3) |
|
|
| supervisor = c.box(Rect(1020, 360, 260, 82), "Supervisor", "routes context", palette=TEAL, title_size=22) |
| bandit = c.box(Rect(850, 525, 250, 82), "Bandit Top-K", "policy shortlist", palette=AMBER, title_size=22) |
| planner = c.box(Rect(1190, 525, 250, 82), "Planner", "drafts action", palette=VIOLET, title_size=22) |
| critic = c.box(Rect(1190, 700, 250, 82), "Critic", "checks action", palette=ROSE, title_size=22) |
| replan = c.box(Rect(850, 700, 250, 82), "Review / Replan", "on veto", palette=ROSE, title_size=22) |
| c.pill(Rect(850, 915, 590, 74), "coordination: supervisor routing | veto loop | lightweight debate", SLATE, size=21) |
|
|
| env_step = c.box(Rect(1855, 370, 250, 84), "Env Step", "apply transition", palette=AMBER, title_size=22) |
| explainer = c.box(Rect(1855, 540, 250, 84), "Explainer", "grounded rationale", palette=TEAL, title_size=22) |
| reward = c.box(Rect(1855, 710, 250, 84), "Reward + Trace", "step feedback", palette=SLATE, title_size=22) |
| update = c.box(Rect(1855, 880, 250, 84), "Bandit Update", "learn from reward", palette=AMBER, title_size=22) |
|
|
| arrow(c.draw, [anchor(inputs[-1], "right"), (705, 926), (705, 401), anchor(supervisor, "left")], color="#2563eb", label="candidate") |
| arrow(c.draw, [anchor(supervisor, "bottom"), (1150, 485), anchor(bandit, "top")], color="#b45309") |
| arrow(c.draw, [anchor(supervisor, "bottom"), (1150, 485), anchor(planner, "top")], color="#7c3aed") |
| arrow(c.draw, [anchor(bandit, "right"), anchor(planner, "left")], color="#b45309") |
| arrow(c.draw, [anchor(planner, "bottom"), anchor(critic, "top")], color="#7c3aed") |
| arrow(c.draw, [anchor(critic, "left"), anchor(replan, "right")], color="#e11d48", label="veto") |
| arrow(c.draw, [anchor(replan, "top"), (975, 650), (1315, 650), anchor(planner, "bottom")], color="#e11d48") |
| arrow(c.draw, [anchor(critic, "right"), anchor(env_step, "left")], color="#0f766e", label="approved") |
| arrow(c.draw, [anchor(env_step, "bottom"), anchor(explainer, "top")], color="#0f766e") |
| arrow(c.draw, [anchor(explainer, "bottom"), anchor(reward, "top")], color="#64748b") |
| arrow(c.draw, [anchor(reward, "bottom"), anchor(update, "top")], color="#b45309") |
| arrow(c.draw, [anchor(update, "left"), (1585, 922), (1585, 1055), (800, 1055), (800, 566), anchor(bandit, "left")], color="#b45309", label="reward learning", label_offset=(-170, 10)) |
| return c.save("multi_agent_orchestration") |
|
|
|
|
| def reward_decomposition() -> Path: |
| c = Chart(2500, 1420, "Reward Decomposition", "Verifier-backed rewards remain inspectable through component columns and judge-friendly primary channels.") |
| action = c.box(Rect(930, 210, 640, 92), "Candidate action", "selected legal candidate or fallback", palette=BLUE) |
| checks = c.box(Rect(800, 360, 900, 94), "Verifier + Transition + Anti-Cheat + Uncertainty", palette=VIOLET, title_size=25) |
| arrow(c.draw, [anchor(action, "bottom"), anchor(checks, "top")], color="#7c3aed") |
| channel_specs = [ |
| ( |
| Rect(140, 575, 500, 455), |
| "safety_legality", |
| "legal and safe action choice", |
| ["format compliance", "candidate alignment", "legality", "safety delta"], |
| ROSE, |
| ), |
| ( |
| Rect(730, 575, 500, 455), |
| "clinical_improvement", |
| "clinical risk moves in the right direction", |
| ["burden improvement", "disease stability"], |
| TEAL, |
| ), |
| ( |
| Rect(1320, 575, 500, 455), |
| "dosing_quality", |
| "dose-sensitive decisions are handled", |
| ["dosing quality"], |
| AMBER, |
| ), |
| ( |
| Rect(1910, 575, 500, 455), |
| "process_integrity", |
| "process, uncertainty, and anti-cheat safeguards", |
| ["abstention quality", "efficiency", "process fidelity", "explanation grounding", "anti-cheat", "uncertainty calibration"], |
| VIOLET, |
| ), |
| ] |
| channels: list[Rect] = [] |
| for rect, title, subtitle, components, pal in channel_specs: |
| c.group(rect, title, pal) |
| subtitle_font = font(21) |
| lines = wrap_lines(c.draw, subtitle, subtitle_font, rect.w - 64) |
| c.draw.multiline_text((rect.x + 32, rect.y + 86), "\n".join(lines), font=subtitle_font, fill=MUTED, spacing=5) |
| compact = len(components) > 4 |
| y = rect.y + (148 if compact else 155) |
| pill_h = 42 if compact else 54 |
| step = 50 if compact else 66 |
| for item in components: |
| c.pill(Rect(rect.x + 44, y, rect.w - 88, pill_h), item, pal, size=17 if compact else 19) |
| y += step |
| channels.append(rect) |
| arrow(c.draw, [anchor(checks, "bottom"), (rect.x + rect.w // 2, 520), anchor(rect, "top")], color=pal[1], width=3) |
| total = c.box(Rect(930, 1230, 640, 102), "total_reward", "clamped to 0.001 - 0.999", palette=BLUE, title_size=28) |
| for ch in channels: |
| arrow(c.draw, [anchor(ch, "bottom"), anchor(total, "top")], color="#2563eb", width=3) |
| return c.save("reward_decomposition") |
|
|
|
|
| def episode_state_machine() -> Path: |
| c = Chart(2250, 1120, "Episode State Machine", "Terminal reasons are explicit, making rollouts auditable and reward hacking visible.") |
| nodes = { |
| "Start": c.box(Rect(100, 520, 190, 82), "Start", palette=BLUE), |
| "Reset": c.box(Rect(390, 520, 190, 82), "Reset", palette=BLUE), |
| "Observe": c.box(Rect(680, 520, 220, 82), "Observe", palette=TEAL), |
| "Select": c.box(Rect(1020, 500, 260, 122), "Candidate Selection", palette=TEAL), |
| "Verify": c.box(Rect(1420, 500, 240, 122), "Verification", palette=VIOLET), |
| "Transition": c.box(Rect(1810, 395, 245, 90), "Transition", palette=TEAL), |
| "Rollback": c.box(Rect(1810, 610, 245, 90), "Rollback", palette=ROSE), |
| "Reward": c.box(Rect(1450, 820, 250, 96), "Reward Scoring", palette=AMBER), |
| "Continue": c.box(Rect(980, 820, 250, 96), "Continue", palette=SLATE), |
| "Done": c.box(Rect(1950, 820, 220, 96), "Done", palette=BLUE), |
| } |
| chain = ["Start", "Reset", "Observe", "Select", "Verify"] |
| for a, b in zip(chain, chain[1:]): |
| arrow(c.draw, [anchor(nodes[a], "right"), anchor(nodes[b], "left")], color="#475569") |
| arrow(c.draw, [anchor(nodes["Verify"], "right"), anchor(nodes["Transition"], "left")], color="#0f766e", label="legal") |
| arrow(c.draw, [anchor(nodes["Verify"], "right"), (1725, 560), anchor(nodes["Rollback"], "left")], color="#e11d48", label="blocked") |
| arrow(c.draw, [anchor(nodes["Transition"], "bottom"), (1930, 780), anchor(nodes["Reward"], "right")], color="#b45309") |
| arrow(c.draw, [anchor(nodes["Rollback"], "bottom"), (1930, 780), anchor(nodes["Reward"], "right")], color="#b45309") |
| arrow(c.draw, [anchor(nodes["Reward"], "left"), anchor(nodes["Continue"], "right")], color="#64748b", label="budget remains") |
| arrow(c.draw, [anchor(nodes["Continue"], "top"), (1105, 690), (790, 690), anchor(nodes["Observe"], "bottom")], color="#64748b") |
| reasons = ["safe resolution", "review escalation", "exploit detected", "timeout", "budget exhausted"] |
| for i, reason in enumerate(reasons): |
| y = 760 + i * 50 |
| c.pill(Rect(1735, y, 175, 36), reason, SLATE, size=16) |
| arrow(c.draw, [(1910, y + 18), anchor(nodes["Done"], "left")], color="#2563eb", width=2) |
| return c.save("episode_state_machine") |
|
|
|
|
| def deployment_topology() -> Path: |
| c = Chart(2400, 1380, "Deployment Topology", "Local services, public product Space, private training Space, and artifact exchange on Hugging Face Hub.") |
| local = Rect(100, 245, 580, 830) |
| product = Rect(810, 245, 600, 350) |
| training = Rect(810, 725, 600, 350) |
| hub = Rect(1540, 245, 760, 830) |
| for rect, title, pal in [ |
| (local, "Local Developer Machine", BLUE), |
| (product, "Public Product Space", TEAL), |
| (training, "Private Training Space", VIOLET), |
| (hub, "Hugging Face Hub", AMBER), |
| ]: |
| c.group(rect, title, pal) |
| repo = c.box(Rect(240, 365, 300, 86), "polyguard-rl repo", palette=BLUE, title_size=22) |
| local_runtime = c.box(Rect(165, 545, 210, 82), "Local API", ":8200", palette=VIOLET, title_size=21) |
| local_env = c.box(Rect(405, 545, 210, 82), "OpenEnv", ":8201", palette=AMBER, title_size=21) |
| vite = c.box(Rect(165, 695, 210, 82), "Vite UI", ":5173", palette=BLUE, title_size=21) |
| checks = c.box(Rect(405, 695, 210, 82), "Checks", "pytest / validate / gate", palette=SLATE, title_size=21) |
| space_bundle = c.box(Rect(955, 365, 310, 84), "Product Docker Bundle", palette=TEAL, title_size=22) |
| product_runtime = c.box(Rect(890, 500, 205, 76), "FastAPI Runtime", palette=TEAL, title_size=19) |
| product_ui = c.box(Rect(1135, 500, 205, 76), "React Workbench", palette=TEAL, title_size=19) |
| train_bundle = c.box(Rect(955, 845, 310, 84), "Training Docker Space", palette=VIOLET, title_size=22) |
| runner = c.box(Rect(890, 980, 205, 76), "Gradio Runner", palette=VIOLET, title_size=19) |
| gpu = c.box(Rect(1135, 980, 205, 76), "HF GPU A10G", palette=VIOLET, title_size=19) |
| product_repo = c.box(Rect(1625, 360, 265, 86), "Product Space Repo", "polyguard-openenv", palette=AMBER, title_size=21) |
| training_repo = c.box(Rect(1975, 360, 240, 86), "Training Space Repo", palette=AMBER, title_size=21) |
| artifact_repo = c.box(Rect(1625, 610, 265, 86), "Artifact Repo", "adapters / reports", palette=AMBER, title_size=21) |
| evidence_repo = c.box(Rect(1975, 610, 240, 86), "Evidence Space", palette=AMBER, title_size=21) |
| docs = c.box(Rect(1780, 850, 275, 86), "Local docs/results", "pulled evidence", palette=SLATE, title_size=21) |
| for target in [local_runtime, local_env, vite, checks]: |
| arrow(c.draw, [anchor(repo, "bottom"), anchor(target, "top")], color="#2563eb") |
| arrow(c.draw, [anchor(repo, "right"), anchor(space_bundle, "left")], color="#0f766e", label="deploy product") |
| arrow(c.draw, [anchor(repo, "right"), (745, 885), anchor(train_bundle, "left")], color="#7c3aed", label="deploy training") |
| arrow(c.draw, [anchor(space_bundle, "right"), anchor(product_repo, "left")], color="#0f766e") |
| arrow(c.draw, [anchor(space_bundle, "bottom"), anchor(product_runtime, "top")], color="#0f766e") |
| arrow(c.draw, [anchor(space_bundle, "bottom"), anchor(product_ui, "top")], color="#0f766e") |
| arrow(c.draw, [anchor(train_bundle, "right"), anchor(training_repo, "left")], color="#7c3aed") |
| arrow(c.draw, [anchor(train_bundle, "bottom"), anchor(runner, "top")], color="#7c3aed") |
| arrow(c.draw, [anchor(runner, "right"), anchor(gpu, "left")], color="#7c3aed") |
| arrow(c.draw, [anchor(runner, "right"), anchor(artifact_repo, "left")], color="#b45309", label="upload") |
| arrow(c.draw, [anchor(artifact_repo, "right"), anchor(evidence_repo, "left")], color="#b45309") |
| arrow(c.draw, [anchor(artifact_repo, "bottom"), anchor(docs, "top")], color="#64748b", label="pull") |
| return c.save("deployment_topology") |
|
|
|
|
| def evidence_generation_flow() -> Path: |
| c = Chart(2300, 980, "Evidence Generation Flow", "Training outputs are converted into reviewer-facing reports, plots, bundles, and README claims.") |
| train = c.box(Rect(100, 435, 250, 96), "SFT / GRPO Runs", palette=VIOLET) |
| reports = c.box(Rect(465, 320, 260, 90), "Run Reports", palette=AMBER) |
| checkpoints = c.box(Rect(465, 560, 260, 90), "Adapters + Merged Artifacts", palette=AMBER, title_size=22) |
| pull = c.box(Rect(850, 435, 260, 96), "Pull Training Artifacts", palette=BLUE) |
| post = c.box(Rect(1250, 260, 290, 90), "Post-Save Inference", palette=TEAL) |
| ablations = c.box(Rect(1250, 435, 290, 90), "Policy-Stack Ablations", palette=TEAL) |
| benchmarks = c.box(Rect(1250, 610, 290, 90), "Benchmarks + Robustness", palette=TEAL) |
| charts = c.box(Rect(1655, 435, 210, 90), "Charts", palette=ROSE) |
| results = c.box(Rect(1955, 320, 260, 80), "docs/results", palette=SLATE, title_size=22) |
| bundle = c.box(Rect(1955, 455, 260, 80), "Submission Bundle", palette=SLATE, title_size=22) |
| readme = c.box(Rect(1955, 590, 260, 80), "README Claims", palette=SLATE, title_size=22) |
| arrow(c.draw, [anchor(train, "right"), anchor(reports, "left")], color="#b45309") |
| arrow(c.draw, [anchor(train, "right"), anchor(checkpoints, "left")], color="#b45309") |
| arrow(c.draw, [anchor(reports, "right"), anchor(pull, "left")], color="#2563eb") |
| arrow(c.draw, [anchor(checkpoints, "right"), anchor(pull, "left")], color="#2563eb") |
| for target in [post, ablations, benchmarks]: |
| arrow(c.draw, [anchor(pull, "right"), anchor(target, "left")], color="#0f766e") |
| arrow(c.draw, [anchor(target, "right"), anchor(charts, "left")], color="#e11d48") |
| arrow(c.draw, [anchor(charts, "right"), anchor(results, "left")], color="#64748b") |
| arrow(c.draw, [anchor(charts, "right"), anchor(bundle, "left")], color="#64748b") |
| arrow(c.draw, [anchor(charts, "right"), anchor(readme, "left")], color="#64748b") |
| return c.save("evidence_generation_flow") |
|
|
|
|
| def frontend_runtime_surface() -> Path: |
| c = Chart(2300, 1350, "Frontend Runtime Surface", "React pages map to concrete FastAPI endpoints used by the Patient Workbench and supporting views.") |
| pages_group = Rect(100, 245, 560, 965) |
| api_group = Rect(780, 245, 720, 965) |
| runtime_group = Rect(1640, 245, 560, 965) |
| c.group(pages_group, "React App Pages", BLUE) |
| c.group(api_group, "API Endpoints", TEAL) |
| c.group(runtime_group, "Backend Runtime", VIOLET) |
| app = c.box(Rect(230, 365, 300, 90), "React App", palette=BLUE) |
| pages = [ |
| c.box(Rect(190, 525, 340, 78), "Patient Workbench", palette=BLUE, title_size=21), |
| c.box(Rect(190, 665, 340, 78), "Policy Lab", palette=BLUE, title_size=21), |
| c.box(Rect(190, 805, 340, 78), "Safety + Dosing Views", palette=BLUE, title_size=21), |
| c.box(Rect(190, 945, 340, 78), "Replay + Training Views", palette=BLUE, title_size=21), |
| ] |
| c.pill(Rect(190, 1090, 340, 48), "shared fetchJson client", BLUE, size=18) |
| arrow(c.draw, [anchor(app, "bottom"), anchor(pages[0], "top")], color="#2563eb", width=3) |
| for a, b in zip(pages, pages[1:]): |
| arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#2563eb", width=3) |
|
|
| endpoint_cards = [ |
| c.box( |
| Rect(900, 485, 470, 116), |
| "Session + Step", |
| "POST /env/reset\nPOST /env/step_candidate", |
| palette=TEAL, |
| title_size=22, |
| body_size=18, |
| ), |
| c.box( |
| Rect(900, 655, 470, 132), |
| "Policy + Safety", |
| "POST /agents/orchestrate\nGET /env/reward_breakdown\nGET /policy/model_status", |
| palette=TEAL, |
| title_size=22, |
| body_size=17, |
| ), |
| c.box( |
| Rect(900, 850, 470, 116), |
| "Evaluation", |
| "POST /eval/run_baselines\nPOST /eval/run_dosing", |
| palette=TEAL, |
| title_size=22, |
| body_size=18, |
| ), |
| c.box( |
| Rect(900, 1035, 470, 116), |
| "Trace + Metrics", |
| "GET /env/trace\nGET /metrics/training", |
| palette=TEAL, |
| title_size=22, |
| body_size=18, |
| ), |
| ] |
| for page, endpoint in zip(pages, endpoint_cards): |
| arrow(c.draw, [anchor(page, "right"), anchor(endpoint, "left")], color="#0f766e", width=3) |
| c.pill(Rect(945, 1170, 380, 48), "all calls use API_BASE", TEAL, size=18) |
|
|
| api = c.box(Rect(1785, 395, 270, 96), "FastAPI API", palette=VIOLET) |
| env = c.box(Rect(1785, 610, 270, 96), "PolyGuardEnv", palette=AMBER) |
| policy = c.box(Rect(1785, 825, 270, 96), "Policy Runtime", palette=MINT) |
| evals = c.box(Rect(1785, 1040, 270, 96), "Eval + Metrics", palette=SLATE) |
| arrow(c.draw, [anchor(api_group, "right"), anchor(api, "left")], color="#7c3aed", width=4, label="fetchJson") |
| arrow(c.draw, [anchor(api, "bottom"), anchor(env, "top")], color="#b45309") |
| arrow(c.draw, [anchor(env, "bottom"), anchor(policy, "top")], color="#0891b2") |
| arrow(c.draw, [anchor(policy, "bottom"), anchor(evals, "top")], color="#64748b") |
| return c.save("frontend_runtime_surface") |
|
|
|
|
| RENDERERS = [ |
| system_architecture, |
| runtime_step_flow, |
| data_training_pipeline, |
| multi_agent_orchestration, |
| reward_decomposition, |
| episode_state_machine, |
| deployment_topology, |
| evidence_generation_flow, |
| frontend_runtime_surface, |
| ] |
|
|
|
|
| def main() -> None: |
| OUT_DIR.mkdir(parents=True, exist_ok=True) |
| for existing in OUT_DIR.glob("*.png"): |
| existing.unlink() |
| rendered = [renderer() for renderer in RENDERERS] |
| print("rendered_diagrams:") |
| for path in rendered: |
| print(path.relative_to(ROOT)) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|