Spaces:

TheJackBright
/

polyguard-openenv-workbench

Sleeping

File size: 36,536 Bytes

f8a246b

#!/usr/bin/env python3
"""Render polished PolyGuard architecture diagrams as individual PNG charts."""

from __future__ import annotations

import math
import textwrap
from dataclasses import dataclass
from pathlib import Path
from typing import Iterable

from PIL import Image, ImageDraw, ImageFont


ROOT = Path(__file__).resolve().parents[1]
OUT_DIR = ROOT / "docs" / "assets" / "diagrams"

FONT_REGULAR = "/System/Library/Fonts/Supplemental/Arial.ttf"
FONT_BOLD = "/System/Library/Fonts/Supplemental/Arial Bold.ttf"

BG = "#f6f8fb"
INK = "#172033"
MUTED = "#64748b"
LINE = "#718096"
WHITE = "#ffffff"

BLUE = ("#eaf3ff", "#2563eb")
VIOLET = ("#f3edff", "#7c3aed")
TEAL = ("#e8f8f2", "#0f766e")
AMBER = ("#fff5df", "#b45309")
ROSE = ("#fff1f3", "#e11d48")
SLATE = ("#eef2f7", "#475569")
MINT = ("#e7f7fb", "#0891b2")


@dataclass(frozen=True)
class Rect:
    x: int
    y: int
    w: int
    h: int


def font(size: int, bold: bool = False) -> ImageFont.FreeTypeFont:
    path = FONT_BOLD if bold else FONT_REGULAR
    return ImageFont.truetype(path, size)


def text_size(draw: ImageDraw.ImageDraw, text: str, fnt: ImageFont.FreeTypeFont) -> tuple[int, int]:
    if not text:
        return 0, 0
    box = draw.textbbox((0, 0), text, font=fnt)
    return box[2] - box[0], box[3] - box[1]


def wrap_lines(draw: ImageDraw.ImageDraw, text: str, fnt: ImageFont.FreeTypeFont, max_width: int) -> list[str]:
    lines: list[str] = []
    for part in text.split("\n"):
        if not part.strip():
            lines.append("")
            continue
        words = part.split()
        current = ""
        for word in words:
            candidate = word if not current else f"{current} {word}"
            if text_size(draw, candidate, fnt)[0] <= max_width:
                current = candidate
            else:
                if current:
                    lines.append(current)
                    current = word
                else:
                    chunks = textwrap.wrap(word, width=max(8, max_width // max(1, fnt.size)))
                    lines.extend(chunks[:-1])
                    current = chunks[-1] if chunks else word
        if current:
            lines.append(current)
    return lines


def draw_centered_lines(
    draw: ImageDraw.ImageDraw,
    lines: Iterable[str],
    fnt: ImageFont.FreeTypeFont,
    x: int,
    y: int,
    w: int,
    fill: str = INK,
    line_gap: int = 7,
) -> int:
    yy = y
    for line in lines:
        tw, th = text_size(draw, line, fnt)
        draw.text((x + (w - tw) / 2, yy), line, font=fnt, fill=fill)
        yy += th + line_gap
    return yy


def rounded(
    draw: ImageDraw.ImageDraw,
    rect: Rect,
    fill: str,
    outline: str = "#d5deea",
    width: int = 2,
    radius: int = 22,
    shadow: bool = True,
) -> None:
    if shadow:
        shadow_rect = (rect.x + 8, rect.y + 10, rect.x + rect.w + 8, rect.y + rect.h + 10)
        draw.rounded_rectangle(shadow_rect, radius=radius, fill="#dfe6ef")
    draw.rounded_rectangle(
        (rect.x, rect.y, rect.x + rect.w, rect.y + rect.h),
        radius=radius,
        fill=fill,
        outline=outline,
        width=width,
    )


def anchor(rect: Rect, side: str) -> tuple[int, int]:
    if side == "top":
        return rect.x + rect.w // 2, rect.y
    if side == "bottom":
        return rect.x + rect.w // 2, rect.y + rect.h
    if side == "left":
        return rect.x, rect.y + rect.h // 2
    if side == "right":
        return rect.x + rect.w, rect.y + rect.h // 2
    return rect.x + rect.w // 2, rect.y + rect.h // 2


def arrow(
    draw: ImageDraw.ImageDraw,
    points: list[tuple[int, int]],
    color: str = LINE,
    width: int = 4,
    label: str | None = None,
    label_offset: tuple[int, int] = (0, -26),
) -> None:
    draw.line(points, fill=color, width=width, joint="curve")
    if len(points) < 2:
        return
    x1, y1 = points[-2]
    x2, y2 = points[-1]
    angle = math.atan2(y2 - y1, x2 - x1)
    size = 17
    left = (x2 - size * math.cos(angle - math.pi / 7), y2 - size * math.sin(angle - math.pi / 7))
    right = (x2 - size * math.cos(angle + math.pi / 7), y2 - size * math.sin(angle + math.pi / 7))
    draw.polygon([(x2, y2), left, right], fill=color)
    if label:
        mx = (x1 + x2) // 2 + label_offset[0]
        my = (y1 + y2) // 2 + label_offset[1]
        fnt = font(22, bold=True)
        tw, th = text_size(draw, label, fnt)
        pad_x, pad_y = 12, 6
        draw.rounded_rectangle(
            (mx - tw / 2 - pad_x, my - pad_y, mx + tw / 2 + pad_x, my + th + pad_y),
            radius=12,
            fill=WHITE,
            outline="#dbe3ee",
        )
        draw.text((mx - tw / 2, my), label, font=fnt, fill=color)


class Chart:
    def __init__(self, width: int, height: int, title: str, subtitle: str = "") -> None:
        self.width = width
        self.height = height
        self.image = Image.new("RGB", (width, height), BG)
        self.draw = ImageDraw.Draw(self.image)
        self.title(title, subtitle)

    def title(self, title: str, subtitle: str = "") -> None:
        self.draw.rectangle((0, 0, self.width, 14), fill="#1d4ed8")
        self.draw.text((76, 54), title, font=font(58, bold=True), fill=INK)
        if subtitle:
            self.draw.text((78, 126), subtitle, font=font(26), fill=MUTED)

    def group(self, rect: Rect, title: str, palette: tuple[str, str]) -> None:
        fill, accent = palette
        rounded(self.draw, rect, fill=fill, outline="#cbd5e1", width=2, radius=30, shadow=False)
        self.draw.rounded_rectangle(
            (rect.x, rect.y, rect.x + rect.w, rect.y + 64),
            radius=30,
            fill=accent,
        )
        self.draw.rectangle((rect.x, rect.y + 34, rect.x + rect.w, rect.y + 64), fill=accent)
        self.draw.text((rect.x + 26, rect.y + 18), title, font=font(26, bold=True), fill=WHITE)

    def box(
        self,
        rect: Rect,
        title: str,
        body: str = "",
        palette: tuple[str, str] = SLATE,
        title_size: int = 25,
        body_size: int = 20,
        center: bool = True,
    ) -> Rect:
        fill, accent = palette
        rounded(self.draw, rect, fill=WHITE, outline="#cbd5e1", width=2, radius=20, shadow=True)
        self.draw.rounded_rectangle((rect.x, rect.y, rect.x + 10, rect.y + rect.h), radius=20, fill=accent)
        title_font = font(title_size, bold=True)
        body_font = font(body_size)
        max_width = rect.w - 46
        title_lines = wrap_lines(self.draw, title, title_font, max_width)
        body_lines = wrap_lines(self.draw, body, body_font, max_width) if body else []
        title_height = sum(text_size(self.draw, line, title_font)[1] for line in title_lines) + max(0, len(title_lines) - 1) * 7
        body_height = sum(text_size(self.draw, line, body_font)[1] for line in body_lines) + max(0, len(body_lines) - 1) * 6
        gap = 10 if body_lines else 0
        total = title_height + body_height + gap
        yy = rect.y + max(18, (rect.h - total) // 2) if center else rect.y + 20
        if center:
            yy = draw_centered_lines(self.draw, title_lines, title_font, rect.x + 22, yy, max_width, INK)
            if body_lines:
                yy += gap
                draw_centered_lines(self.draw, body_lines, body_font, rect.x + 22, yy, max_width, MUTED, line_gap=6)
        else:
            self.draw.multiline_text((rect.x + 28, yy), "\n".join(title_lines), font=title_font, fill=INK, spacing=7)
            yy += title_height + gap
            if body_lines:
                self.draw.multiline_text((rect.x + 28, yy), "\n".join(body_lines), font=body_font, fill=MUTED, spacing=6)
        return rect

    def pill(self, rect: Rect, text: str, palette: tuple[str, str], size: int = 21) -> Rect:
        fill, accent = palette
        self.draw.rounded_rectangle(
            (rect.x, rect.y, rect.x + rect.w, rect.y + rect.h),
            radius=rect.h // 2,
            fill=fill,
            outline=accent,
            width=2,
        )
        lines = wrap_lines(self.draw, text, font(size, bold=True), rect.w - 28)
        total_h = len(lines) * (size + 6)
        draw_centered_lines(self.draw, lines, font(size, bold=True), rect.x + 14, rect.y + (rect.h - total_h) // 2, rect.w - 28, INK, line_gap=4)
        return rect

    def save(self, name: str) -> Path:
        OUT_DIR.mkdir(parents=True, exist_ok=True)
        path = OUT_DIR / f"{name}.png"
        self.image.save(path, quality=96)
        return path


def system_architecture() -> Path:
    c = Chart(
        2400,
        1500,
        "PolyGuard System Architecture",
        "Research environment, policy stack, OpenEnv runtime, model artifacts, and evidence outputs.",
    )
    clients = Rect(110, 190, 2180, 190)
    api = Rect(110, 440, 2180, 190)
    agents = Rect(110, 690, 2180, 260)
    runtime = Rect(110, 1015, 1050, 300)
    assets = Rect(1240, 1015, 1050, 300)
    for rect, title, pal in [
        (clients, "User And Integration Surfaces", BLUE),
        (api, "API And OpenEnv Surface", VIOLET),
        (agents, "Multi-Agent Policy Stack", TEAL),
        (runtime, "OpenEnv Runtime And Rewards", AMBER),
        (assets, "Data, Models, And Evidence Outputs", MINT),
    ]:
        c.group(rect, title, pal)

    client_boxes = [
        c.box(Rect(185, 275, 390, 78), "React Patient Workbench", palette=BLUE),
        c.box(Rect(665, 275, 350, 78), "Public HF Space", palette=BLUE),
        c.box(Rect(1105, 275, 380, 78), "One-Run Notebook / CLI", palette=BLUE),
        c.box(Rect(1575, 275, 365, 78), "OpenEnv Validator", palette=BLUE),
    ]
    api_boxes = [
        c.box(Rect(260, 530, 330, 72), "app/api/routes.py", palette=VIOLET, title_size=21),
        c.box(Rect(720, 530, 300, 72), "APIService", palette=VIOLET, title_size=21),
        c.box(Rect(1180, 530, 370, 72), "PolicyProviderRouter", palette=VIOLET, title_size=21),
        c.box(Rect(1680, 530, 350, 72), "app/env/fastapi_app.py", palette=VIOLET, title_size=20),
    ]
    top_agents = [
        c.box(Rect(215 + i * 405, 780, 285, 60), name, palette=TEAL, title_size=19)
        for i, name in enumerate(["MedRec", "Evidence", "Graph Safety", "Dosing", "Candidate"])
    ]
    bottom_agents = [
        c.box(Rect(420 + i * 405, 865, 285, 60), name, palette=TEAL if name != "Contextual Bandit" else AMBER, title_size=19)
        for i, name in enumerate(["Supervisor", "Planner", "Contextual Bandit", "Critic", "Explainer"])
    ]
    runtime_boxes = [
        c.box(Rect(185, 1110, 300, 78), "PolyGuardEnv", "stateful reset / step", palette=AMBER, title_size=22),
        c.box(Rect(545, 1110, 240, 78), "Verifier", "legality gates", palette=AMBER, title_size=21),
        c.box(Rect(845, 1110, 240, 78), "Reward Router", "13 components", palette=AMBER, title_size=21),
        c.box(Rect(345, 1215, 240, 66), "Transition", palette=AMBER, title_size=20),
        c.box(Rect(645, 1215, 240, 66), "Anti-Cheat", palette=AMBER, title_size=20),
    ]
    asset_boxes = [
        c.box(Rect(1305, 1100, 215, 68), "Scenarios", palette=MINT, title_size=20),
        c.box(Rect(1560, 1100, 250, 68), "Drug Knowledge", palette=MINT, title_size=20),
        c.box(Rect(1850, 1100, 250, 68), "Active Qwen", "adapter / merged", palette=MINT, title_size=20),
        c.box(Rect(1305, 1215, 215, 68), "Retrieval Index", palette=MINT, title_size=20),
        c.box(Rect(1560, 1215, 250, 68), "Evaluation Suites", palette=MINT, title_size=20),
        c.box(Rect(1850, 1215, 250, 68), "docs/results", "charts + reports", palette=MINT, title_size=20),
    ]

    for a, b in zip(api_boxes, api_boxes[1:]):
        arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#7c3aed")
    for a, b in zip(top_agents, top_agents[1:]):
        arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#0f766e", width=3)
    c.pill(Rect(990, 845, 420, 44), "routed planning and critique", TEAL, size=18)
    for a, b in zip(bottom_agents, bottom_agents[1:]):
        arrow(c.draw, [anchor(a, "right"), anchor(b, "left")], color="#0f766e", width=3)
    arrow(c.draw, [anchor(runtime_boxes[0], "right"), anchor(runtime_boxes[1], "left")], color="#b45309")
    arrow(c.draw, [anchor(runtime_boxes[1], "right"), anchor(runtime_boxes[2], "left")], color="#b45309")
    arrow(c.draw, [anchor(runtime_boxes[0], "bottom"), anchor(runtime_boxes[3], "top")], color="#b45309")
    arrow(c.draw, [anchor(runtime_boxes[2], "bottom"), anchor(runtime_boxes[4], "top")], color="#b45309")
    arrow(c.draw, [anchor(asset_boxes[0], "right"), anchor(asset_boxes[1], "left")], color="#0891b2")
    arrow(c.draw, [anchor(asset_boxes[1], "right"), anchor(asset_boxes[2], "left")], color="#0891b2")
    arrow(c.draw, [anchor(asset_boxes[3], "right"), anchor(asset_boxes[4], "left")], color="#475569")
    arrow(c.draw, [anchor(asset_boxes[4], "right"), anchor(asset_boxes[5], "left")], color="#475569")
    arrow(c.draw, [(1200, 380), (1200, 440)], color="#3b82f6", label="requests")
    arrow(c.draw, [(1200, 630), (1200, 690)], color="#7c3aed", label="orchestrates")
    arrow(c.draw, [(760, 950), (760, 1015)], color="#0f766e", label="safe action")
    arrow(c.draw, [(1725, 950), (1725, 1015)], color="#0891b2", label="model + evidence")
    arrow(c.draw, [(1160, 1165), (1240, 1165)], color="#64748b", label="reports")
    return c.save("system_architecture")


def runtime_step_flow() -> Path:
    c = Chart(2400, 1320, "Runtime Step Flow", "How one reset or action moves through UI, API, policy, environment, and reward scoring.")
    actors = [
        ("User", 130, BLUE),
        ("React Workbench", 430, BLUE),
        ("FastAPI APIService", 760, VIOLET),
        ("Orchestrator", 1100, TEAL),
        ("PolyGuardEnv", 1440, AMBER),
        ("Policy Provider", 1780, MINT),
        ("Reward Router", 2090, ROSE),
    ]
    x_positions: dict[str, int] = {}
    for name, x, pal in actors:
        rect = c.box(Rect(x, 210, 220, 82), name, palette=pal, title_size=22)
        x_positions[name] = rect.x + rect.w // 2
        c.draw.line((x_positions[name], 315, x_positions[name], 1185), fill="#d0d9e6", width=3)

    def msg(y: int, src: str, dst: str, label: str, color: str = LINE) -> None:
        sx, dx = x_positions[src], x_positions[dst]
        arrow(c.draw, [(sx, y), (dx, y)], color=color, width=4, label=label, label_offset=(0, -34))

    msg(390, "User", "React Workbench", "reset / run")
    msg(500, "React Workbench", "FastAPI APIService", "POST /env/reset")
    msg(610, "FastAPI APIService", "PolyGuardEnv", "reset(seed, task)", "#b45309")
    msg(720, "PolyGuardEnv", "FastAPI APIService", "observation + candidates", "#b45309")
    msg(830, "React Workbench", "FastAPI APIService", "step_candidate or orchestrate")
    msg(940, "FastAPI APIService", "Orchestrator", "agent path", "#0f766e")
    msg(1050, "Orchestrator", "Policy Provider", "optional Qwen selection", "#0891b2")
    msg(1160, "Orchestrator", "PolyGuardEnv", "final action", "#0f766e")
    msg(1020, "PolyGuardEnv", "Reward Router", "13 components -> 4 channels", "#e11d48")
    c.box(Rect(1290, 1160, 430, 90), "Response", "observation, reward, done, trace, info", palette=SLATE)
    arrow(c.draw, [(1440 + 110, 1120), (1505, 1160)], color="#64748b")
    arrow(c.draw, [(1290, 1205), (650, 1205), (650, 900)], color="#64748b", label="render updated panels", label_offset=(0, 16))
    return c.save("runtime_step_flow")


def data_training_pipeline() -> Path:
    c = Chart(2400, 1320, "Data And Training Pipeline", "From local knowledge and synthetic cases to SFT, GRPO, activation, and inference.")
    groups = [
        (Rect(90, 220, 430, 880), "Sources", BLUE),
        (Rect(610, 220, 520, 880), "DataOps", TEAL),
        (Rect(1220, 220, 520, 880), "Post-Training", VIOLET),
        (Rect(1830, 220, 480, 880), "Validation And Use", AMBER),
    ]
    for rect, title, pal in groups:
        c.group(rect, title, pal)
    sources = [
        c.box(Rect(150, 325, 310, 76), "Local drug knowledge", palette=BLUE, title_size=21),
        c.box(Rect(150, 435, 310, 76), "Synthetic patients", palette=BLUE, title_size=21),
        c.box(Rect(150, 545, 310, 76), "Scenario files", "easy / medium / hard", palette=BLUE, title_size=21),
        c.box(Rect(150, 655, 310, 76), "Optional HF data", palette=BLUE, title_size=21),
        c.box(Rect(150, 765, 310, 76), "DDI API", "optional", palette=BLUE, title_size=21),
        c.box(Rect(150, 875, 310, 76), "Web fallback", "optional", palette=BLUE, title_size=21),
    ]
    dataops = [
        c.box(Rect(700, 330, 340, 78), "Normalize drugs", palette=TEAL, title_size=22),
        c.box(Rect(700, 465, 340, 78), "Build knowledge graph", palette=TEAL, title_size=22),
        c.box(Rect(700, 600, 340, 78), "Build retrieval index", palette=TEAL, title_size=22),
        c.box(Rect(700, 735, 340, 78), "Build scenarios", palette=TEAL, title_size=22),
        c.box(Rect(700, 870, 340, 90), "Build SFT / GRPO corpus", palette=TEAL, title_size=22),
    ]
    training = [
        c.box(Rect(1310, 345, 340, 86), "TRL SFT adapter", palette=VIOLET, title_size=22),
        c.box(Rect(1310, 505, 340, 86), "TRL GRPO", "environment reward", palette=VIOLET, title_size=22),
        c.box(Rect(1310, 665, 340, 86), "Merge / export adapters", palette=VIOLET, title_size=22),
        c.box(Rect(1310, 825, 340, 86), "Registry + manifests", palette=VIOLET, title_size=22),
    ]
    validation = [
        c.box(Rect(1905, 345, 310, 86), "Post-save inference", palette=AMBER, title_size=22),
        c.box(Rect(1905, 505, 310, 86), "Activate model", palette=AMBER, title_size=22),
        c.box(Rect(1905, 665, 310, 86), "/policy/model_status", palette=AMBER, title_size=21),
        c.box(Rect(1905, 825, 310, 86), "/policy/infer", palette=AMBER, title_size=21),
    ]
    for src in sources:
        arrow(c.draw, [anchor(src, "right"), anchor(dataops[-1], "left")], color="#3b82f6", width=3)
    for a, b in zip(dataops, dataops[1:]):
        arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#0f766e")
    arrow(c.draw, [anchor(dataops[-1], "right"), anchor(training[0], "left")], color="#7c3aed", label="corpus")
    for a, b in zip(training, training[1:]):
        arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#7c3aed")
    arrow(c.draw, [anchor(training[-1], "right"), anchor(validation[0], "left")], color="#b45309", label="artifact")
    for a, b in zip(validation, validation[1:]):
        arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#b45309")
    return c.save("data_training_pipeline")


def multi_agent_orchestration() -> Path:
    c = Chart(2400, 1250, "Multi-Agent Orchestration", "Specialized agents build a verified candidate, route it through policy control, then close the loop with reward feedback.")
    input_group = Rect(90, 250, 560, 850)
    decision_group = Rect(760, 250, 780, 850)
    closure_group = Rect(1650, 250, 660, 850)
    for rect, title, pal in [
        (input_group, "Candidate Construction", BLUE),
        (decision_group, "Policy Control", VIOLET),
        (closure_group, "Step Closure", AMBER),
    ]:
        c.group(rect, title, pal)

    inputs = [
        c.box(Rect(210, 360, 320, 72), "State", palette=BLUE, title_size=22),
        c.box(Rect(210, 465, 320, 72), "MedRec", palette=TEAL, title_size=22),
        c.box(Rect(210, 570, 320, 72), "Evidence", palette=TEAL, title_size=22),
        c.box(Rect(210, 675, 320, 72), "Graph Safety", palette=TEAL, title_size=22),
        c.box(Rect(210, 780, 320, 72), "Dosing", palette=TEAL, title_size=22),
        c.box(Rect(210, 885, 320, 82), "Candidate", "legal action set", palette=TEAL, title_size=22),
    ]
    for a, b in zip(inputs, inputs[1:]):
        arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#0f766e", width=3)

    supervisor = c.box(Rect(1020, 360, 260, 82), "Supervisor", "routes context", palette=TEAL, title_size=22)
    bandit = c.box(Rect(850, 525, 250, 82), "Bandit Top-K", "policy shortlist", palette=AMBER, title_size=22)
    planner = c.box(Rect(1190, 525, 250, 82), "Planner", "drafts action", palette=VIOLET, title_size=22)
    critic = c.box(Rect(1190, 700, 250, 82), "Critic", "checks action", palette=ROSE, title_size=22)
    replan = c.box(Rect(850, 700, 250, 82), "Review / Replan", "on veto", palette=ROSE, title_size=22)
    c.pill(Rect(850, 915, 590, 74), "coordination: supervisor routing | veto loop | lightweight debate", SLATE, size=21)

    env_step = c.box(Rect(1855, 370, 250, 84), "Env Step", "apply transition", palette=AMBER, title_size=22)
    explainer = c.box(Rect(1855, 540, 250, 84), "Explainer", "grounded rationale", palette=TEAL, title_size=22)
    reward = c.box(Rect(1855, 710, 250, 84), "Reward + Trace", "step feedback", palette=SLATE, title_size=22)
    update = c.box(Rect(1855, 880, 250, 84), "Bandit Update", "learn from reward", palette=AMBER, title_size=22)

    arrow(c.draw, [anchor(inputs[-1], "right"), (705, 926), (705, 401), anchor(supervisor, "left")], color="#2563eb", label="candidate")
    arrow(c.draw, [anchor(supervisor, "bottom"), (1150, 485), anchor(bandit, "top")], color="#b45309")
    arrow(c.draw, [anchor(supervisor, "bottom"), (1150, 485), anchor(planner, "top")], color="#7c3aed")
    arrow(c.draw, [anchor(bandit, "right"), anchor(planner, "left")], color="#b45309")
    arrow(c.draw, [anchor(planner, "bottom"), anchor(critic, "top")], color="#7c3aed")
    arrow(c.draw, [anchor(critic, "left"), anchor(replan, "right")], color="#e11d48", label="veto")
    arrow(c.draw, [anchor(replan, "top"), (975, 650), (1315, 650), anchor(planner, "bottom")], color="#e11d48")
    arrow(c.draw, [anchor(critic, "right"), anchor(env_step, "left")], color="#0f766e", label="approved")
    arrow(c.draw, [anchor(env_step, "bottom"), anchor(explainer, "top")], color="#0f766e")
    arrow(c.draw, [anchor(explainer, "bottom"), anchor(reward, "top")], color="#64748b")
    arrow(c.draw, [anchor(reward, "bottom"), anchor(update, "top")], color="#b45309")
    arrow(c.draw, [anchor(update, "left"), (1585, 922), (1585, 1055), (800, 1055), (800, 566), anchor(bandit, "left")], color="#b45309", label="reward learning", label_offset=(-170, 10))
    return c.save("multi_agent_orchestration")


def reward_decomposition() -> Path:
    c = Chart(2500, 1420, "Reward Decomposition", "Verifier-backed rewards remain inspectable through component columns and judge-friendly primary channels.")
    action = c.box(Rect(930, 210, 640, 92), "Candidate action", "selected legal candidate or fallback", palette=BLUE)
    checks = c.box(Rect(800, 360, 900, 94), "Verifier + Transition + Anti-Cheat + Uncertainty", palette=VIOLET, title_size=25)
    arrow(c.draw, [anchor(action, "bottom"), anchor(checks, "top")], color="#7c3aed")
    channel_specs = [
        (
            Rect(140, 575, 500, 455),
            "safety_legality",
            "legal and safe action choice",
            ["format compliance", "candidate alignment", "legality", "safety delta"],
            ROSE,
        ),
        (
            Rect(730, 575, 500, 455),
            "clinical_improvement",
            "clinical risk moves in the right direction",
            ["burden improvement", "disease stability"],
            TEAL,
        ),
        (
            Rect(1320, 575, 500, 455),
            "dosing_quality",
            "dose-sensitive decisions are handled",
            ["dosing quality"],
            AMBER,
        ),
        (
            Rect(1910, 575, 500, 455),
            "process_integrity",
            "process, uncertainty, and anti-cheat safeguards",
            ["abstention quality", "efficiency", "process fidelity", "explanation grounding", "anti-cheat", "uncertainty calibration"],
            VIOLET,
        ),
    ]
    channels: list[Rect] = []
    for rect, title, subtitle, components, pal in channel_specs:
        c.group(rect, title, pal)
        subtitle_font = font(21)
        lines = wrap_lines(c.draw, subtitle, subtitle_font, rect.w - 64)
        c.draw.multiline_text((rect.x + 32, rect.y + 86), "\n".join(lines), font=subtitle_font, fill=MUTED, spacing=5)
        compact = len(components) > 4
        y = rect.y + (148 if compact else 155)
        pill_h = 42 if compact else 54
        step = 50 if compact else 66
        for item in components:
            c.pill(Rect(rect.x + 44, y, rect.w - 88, pill_h), item, pal, size=17 if compact else 19)
            y += step
        channels.append(rect)
        arrow(c.draw, [anchor(checks, "bottom"), (rect.x + rect.w // 2, 520), anchor(rect, "top")], color=pal[1], width=3)
    total = c.box(Rect(930, 1230, 640, 102), "total_reward", "clamped to 0.001 - 0.999", palette=BLUE, title_size=28)
    for ch in channels:
        arrow(c.draw, [anchor(ch, "bottom"), anchor(total, "top")], color="#2563eb", width=3)
    return c.save("reward_decomposition")


def episode_state_machine() -> Path:
    c = Chart(2250, 1120, "Episode State Machine", "Terminal reasons are explicit, making rollouts auditable and reward hacking visible.")
    nodes = {
        "Start": c.box(Rect(100, 520, 190, 82), "Start", palette=BLUE),
        "Reset": c.box(Rect(390, 520, 190, 82), "Reset", palette=BLUE),
        "Observe": c.box(Rect(680, 520, 220, 82), "Observe", palette=TEAL),
        "Select": c.box(Rect(1020, 500, 260, 122), "Candidate Selection", palette=TEAL),
        "Verify": c.box(Rect(1420, 500, 240, 122), "Verification", palette=VIOLET),
        "Transition": c.box(Rect(1810, 395, 245, 90), "Transition", palette=TEAL),
        "Rollback": c.box(Rect(1810, 610, 245, 90), "Rollback", palette=ROSE),
        "Reward": c.box(Rect(1450, 820, 250, 96), "Reward Scoring", palette=AMBER),
        "Continue": c.box(Rect(980, 820, 250, 96), "Continue", palette=SLATE),
        "Done": c.box(Rect(1950, 820, 220, 96), "Done", palette=BLUE),
    }
    chain = ["Start", "Reset", "Observe", "Select", "Verify"]
    for a, b in zip(chain, chain[1:]):
        arrow(c.draw, [anchor(nodes[a], "right"), anchor(nodes[b], "left")], color="#475569")
    arrow(c.draw, [anchor(nodes["Verify"], "right"), anchor(nodes["Transition"], "left")], color="#0f766e", label="legal")
    arrow(c.draw, [anchor(nodes["Verify"], "right"), (1725, 560), anchor(nodes["Rollback"], "left")], color="#e11d48", label="blocked")
    arrow(c.draw, [anchor(nodes["Transition"], "bottom"), (1930, 780), anchor(nodes["Reward"], "right")], color="#b45309")
    arrow(c.draw, [anchor(nodes["Rollback"], "bottom"), (1930, 780), anchor(nodes["Reward"], "right")], color="#b45309")
    arrow(c.draw, [anchor(nodes["Reward"], "left"), anchor(nodes["Continue"], "right")], color="#64748b", label="budget remains")
    arrow(c.draw, [anchor(nodes["Continue"], "top"), (1105, 690), (790, 690), anchor(nodes["Observe"], "bottom")], color="#64748b")
    reasons = ["safe resolution", "review escalation", "exploit detected", "timeout", "budget exhausted"]
    for i, reason in enumerate(reasons):
        y = 760 + i * 50
        c.pill(Rect(1735, y, 175, 36), reason, SLATE, size=16)
        arrow(c.draw, [(1910, y + 18), anchor(nodes["Done"], "left")], color="#2563eb", width=2)
    return c.save("episode_state_machine")


def deployment_topology() -> Path:
    c = Chart(2400, 1380, "Deployment Topology", "Local services, public product Space, private training Space, and artifact exchange on Hugging Face Hub.")
    local = Rect(100, 245, 580, 830)
    product = Rect(810, 245, 600, 350)
    training = Rect(810, 725, 600, 350)
    hub = Rect(1540, 245, 760, 830)
    for rect, title, pal in [
        (local, "Local Developer Machine", BLUE),
        (product, "Public Product Space", TEAL),
        (training, "Private Training Space", VIOLET),
        (hub, "Hugging Face Hub", AMBER),
    ]:
        c.group(rect, title, pal)
    repo = c.box(Rect(240, 365, 300, 86), "polyguard-rl repo", palette=BLUE, title_size=22)
    local_runtime = c.box(Rect(165, 545, 210, 82), "Local API", ":8200", palette=VIOLET, title_size=21)
    local_env = c.box(Rect(405, 545, 210, 82), "OpenEnv", ":8201", palette=AMBER, title_size=21)
    vite = c.box(Rect(165, 695, 210, 82), "Vite UI", ":5173", palette=BLUE, title_size=21)
    checks = c.box(Rect(405, 695, 210, 82), "Checks", "pytest / validate / gate", palette=SLATE, title_size=21)
    space_bundle = c.box(Rect(955, 365, 310, 84), "Product Docker Bundle", palette=TEAL, title_size=22)
    product_runtime = c.box(Rect(890, 500, 205, 76), "FastAPI Runtime", palette=TEAL, title_size=19)
    product_ui = c.box(Rect(1135, 500, 205, 76), "React Workbench", palette=TEAL, title_size=19)
    train_bundle = c.box(Rect(955, 845, 310, 84), "Training Docker Space", palette=VIOLET, title_size=22)
    runner = c.box(Rect(890, 980, 205, 76), "Gradio Runner", palette=VIOLET, title_size=19)
    gpu = c.box(Rect(1135, 980, 205, 76), "HF GPU A10G", palette=VIOLET, title_size=19)
    product_repo = c.box(Rect(1625, 360, 265, 86), "Product Space Repo", "polyguard-openenv", palette=AMBER, title_size=21)
    training_repo = c.box(Rect(1975, 360, 240, 86), "Training Space Repo", palette=AMBER, title_size=21)
    artifact_repo = c.box(Rect(1625, 610, 265, 86), "Artifact Repo", "adapters / reports", palette=AMBER, title_size=21)
    evidence_repo = c.box(Rect(1975, 610, 240, 86), "Evidence Space", palette=AMBER, title_size=21)
    docs = c.box(Rect(1780, 850, 275, 86), "Local docs/results", "pulled evidence", palette=SLATE, title_size=21)
    for target in [local_runtime, local_env, vite, checks]:
        arrow(c.draw, [anchor(repo, "bottom"), anchor(target, "top")], color="#2563eb")
    arrow(c.draw, [anchor(repo, "right"), anchor(space_bundle, "left")], color="#0f766e", label="deploy product")
    arrow(c.draw, [anchor(repo, "right"), (745, 885), anchor(train_bundle, "left")], color="#7c3aed", label="deploy training")
    arrow(c.draw, [anchor(space_bundle, "right"), anchor(product_repo, "left")], color="#0f766e")
    arrow(c.draw, [anchor(space_bundle, "bottom"), anchor(product_runtime, "top")], color="#0f766e")
    arrow(c.draw, [anchor(space_bundle, "bottom"), anchor(product_ui, "top")], color="#0f766e")
    arrow(c.draw, [anchor(train_bundle, "right"), anchor(training_repo, "left")], color="#7c3aed")
    arrow(c.draw, [anchor(train_bundle, "bottom"), anchor(runner, "top")], color="#7c3aed")
    arrow(c.draw, [anchor(runner, "right"), anchor(gpu, "left")], color="#7c3aed")
    arrow(c.draw, [anchor(runner, "right"), anchor(artifact_repo, "left")], color="#b45309", label="upload")
    arrow(c.draw, [anchor(artifact_repo, "right"), anchor(evidence_repo, "left")], color="#b45309")
    arrow(c.draw, [anchor(artifact_repo, "bottom"), anchor(docs, "top")], color="#64748b", label="pull")
    return c.save("deployment_topology")


def evidence_generation_flow() -> Path:
    c = Chart(2300, 980, "Evidence Generation Flow", "Training outputs are converted into reviewer-facing reports, plots, bundles, and README claims.")
    train = c.box(Rect(100, 435, 250, 96), "SFT / GRPO Runs", palette=VIOLET)
    reports = c.box(Rect(465, 320, 260, 90), "Run Reports", palette=AMBER)
    checkpoints = c.box(Rect(465, 560, 260, 90), "Adapters + Merged Artifacts", palette=AMBER, title_size=22)
    pull = c.box(Rect(850, 435, 260, 96), "Pull Training Artifacts", palette=BLUE)
    post = c.box(Rect(1250, 260, 290, 90), "Post-Save Inference", palette=TEAL)
    ablations = c.box(Rect(1250, 435, 290, 90), "Policy-Stack Ablations", palette=TEAL)
    benchmarks = c.box(Rect(1250, 610, 290, 90), "Benchmarks + Robustness", palette=TEAL)
    charts = c.box(Rect(1655, 435, 210, 90), "Charts", palette=ROSE)
    results = c.box(Rect(1955, 320, 260, 80), "docs/results", palette=SLATE, title_size=22)
    bundle = c.box(Rect(1955, 455, 260, 80), "Submission Bundle", palette=SLATE, title_size=22)
    readme = c.box(Rect(1955, 590, 260, 80), "README Claims", palette=SLATE, title_size=22)
    arrow(c.draw, [anchor(train, "right"), anchor(reports, "left")], color="#b45309")
    arrow(c.draw, [anchor(train, "right"), anchor(checkpoints, "left")], color="#b45309")
    arrow(c.draw, [anchor(reports, "right"), anchor(pull, "left")], color="#2563eb")
    arrow(c.draw, [anchor(checkpoints, "right"), anchor(pull, "left")], color="#2563eb")
    for target in [post, ablations, benchmarks]:
        arrow(c.draw, [anchor(pull, "right"), anchor(target, "left")], color="#0f766e")
        arrow(c.draw, [anchor(target, "right"), anchor(charts, "left")], color="#e11d48")
    arrow(c.draw, [anchor(charts, "right"), anchor(results, "left")], color="#64748b")
    arrow(c.draw, [anchor(charts, "right"), anchor(bundle, "left")], color="#64748b")
    arrow(c.draw, [anchor(charts, "right"), anchor(readme, "left")], color="#64748b")
    return c.save("evidence_generation_flow")


def frontend_runtime_surface() -> Path:
    c = Chart(2300, 1350, "Frontend Runtime Surface", "React pages map to concrete FastAPI endpoints used by the Patient Workbench and supporting views.")
    pages_group = Rect(100, 245, 560, 965)
    api_group = Rect(780, 245, 720, 965)
    runtime_group = Rect(1640, 245, 560, 965)
    c.group(pages_group, "React App Pages", BLUE)
    c.group(api_group, "API Endpoints", TEAL)
    c.group(runtime_group, "Backend Runtime", VIOLET)
    app = c.box(Rect(230, 365, 300, 90), "React App", palette=BLUE)
    pages = [
        c.box(Rect(190, 525, 340, 78), "Patient Workbench", palette=BLUE, title_size=21),
        c.box(Rect(190, 665, 340, 78), "Policy Lab", palette=BLUE, title_size=21),
        c.box(Rect(190, 805, 340, 78), "Safety + Dosing Views", palette=BLUE, title_size=21),
        c.box(Rect(190, 945, 340, 78), "Replay + Training Views", palette=BLUE, title_size=21),
    ]
    c.pill(Rect(190, 1090, 340, 48), "shared fetchJson client", BLUE, size=18)
    arrow(c.draw, [anchor(app, "bottom"), anchor(pages[0], "top")], color="#2563eb", width=3)
    for a, b in zip(pages, pages[1:]):
        arrow(c.draw, [anchor(a, "bottom"), anchor(b, "top")], color="#2563eb", width=3)

    endpoint_cards = [
        c.box(
            Rect(900, 485, 470, 116),
            "Session + Step",
            "POST /env/reset\nPOST /env/step_candidate",
            palette=TEAL,
            title_size=22,
            body_size=18,
        ),
        c.box(
            Rect(900, 655, 470, 132),
            "Policy + Safety",
            "POST /agents/orchestrate\nGET /env/reward_breakdown\nGET /policy/model_status",
            palette=TEAL,
            title_size=22,
            body_size=17,
        ),
        c.box(
            Rect(900, 850, 470, 116),
            "Evaluation",
            "POST /eval/run_baselines\nPOST /eval/run_dosing",
            palette=TEAL,
            title_size=22,
            body_size=18,
        ),
        c.box(
            Rect(900, 1035, 470, 116),
            "Trace + Metrics",
            "GET /env/trace\nGET /metrics/training",
            palette=TEAL,
            title_size=22,
            body_size=18,
        ),
    ]
    for page, endpoint in zip(pages, endpoint_cards):
        arrow(c.draw, [anchor(page, "right"), anchor(endpoint, "left")], color="#0f766e", width=3)
    c.pill(Rect(945, 1170, 380, 48), "all calls use API_BASE", TEAL, size=18)

    api = c.box(Rect(1785, 395, 270, 96), "FastAPI API", palette=VIOLET)
    env = c.box(Rect(1785, 610, 270, 96), "PolyGuardEnv", palette=AMBER)
    policy = c.box(Rect(1785, 825, 270, 96), "Policy Runtime", palette=MINT)
    evals = c.box(Rect(1785, 1040, 270, 96), "Eval + Metrics", palette=SLATE)
    arrow(c.draw, [anchor(api_group, "right"), anchor(api, "left")], color="#7c3aed", width=4, label="fetchJson")
    arrow(c.draw, [anchor(api, "bottom"), anchor(env, "top")], color="#b45309")
    arrow(c.draw, [anchor(env, "bottom"), anchor(policy, "top")], color="#0891b2")
    arrow(c.draw, [anchor(policy, "bottom"), anchor(evals, "top")], color="#64748b")
    return c.save("frontend_runtime_surface")


RENDERERS = [
    system_architecture,
    runtime_step_flow,
    data_training_pipeline,
    multi_agent_orchestration,
    reward_decomposition,
    episode_state_machine,
    deployment_topology,
    evidence_generation_flow,
    frontend_runtime_surface,
]


def main() -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)
    for existing in OUT_DIR.glob("*.png"):
        existing.unlink()
    rendered = [renderer() for renderer in RENDERERS]
    print("rendered_diagrams:")
    for path in rendered:
        print(path.relative_to(ROOT))


if __name__ == "__main__":
    main()