Cyber_analyst-round1 / assets /architecture_diagram.mmd
Humanlearning's picture
diagrams updated
5809a6c
%%{init: {"theme": "base", "themeCSS": "svg { background: #ffffff; }", "themeVariables": {"background": "#ffffff", "mainBkg": "#ffffff", "edgeLabelBackground": "#ffffff", "fontFamily": "Arial, Helvetica, sans-serif", "primaryTextColor": "#111827", "lineColor": "#0f172a", "clusterBkg": "#ffffff", "clusterBorder": "#cbd5e1"}, "flowchart": {"htmlLabels": false, "curve": "basis", "nodeSpacing": 60, "rankSpacing": 80, "padding": 24}}}%%
flowchart LR
classDef factory fill:#eff6ff,stroke:#2563eb,stroke-width:2px,color:#111827;
classDef runtime fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#111827;
classDef agent fill:#fff7ed,stroke:#ea580c,stroke-width:2px,color:#111827;
classDef training fill:#f5f3ff,stroke:#7c3aed,stroke-width:2px,color:#111827;
classDef feedback fill:#f1f5f9,stroke:#64748b,stroke-width:2px,color:#111827;
subgraph Factory["Scenario factory\noffline authoring"]
direction TB
F1["LLM author\nconfig-driven drafts"] --> F2["ScenarioSpec\npolicy + bug target"]
F2 --> F3["A01 mutator\nFastAPI variants + traps"]
F3 --> F4["Compiler\nexecutable app bundle"]
F4 --> F5["Verifier\nvisible + hidden tests"]
F5 --> F6["Versioned cache\nsplit + difficulty + hash"]
end
subgraph Runtime["OpenEnv runtime\ncache-backed episodes"]
direction TB
R1["reset(seed)\nload cached bundle"] --> R2["Curriculum sampler\nvalidated slice"]
R2 --> R3["Episode state\nphase + history + diff"]
R3 --> R4["Typed tools\ninspect, request, patch"]
R4 --> R5["App sandbox\ncloned workspace"]
R5 --> R6["Verifier\nsecurity + regression"]
R6 --> R7["Reward engine\ncomponents + penalties"]
R7 --> R3
R3 --> R8["API + logger\n/ws, /step, artifacts"]
end
subgraph Agent["Single LLM agent"]
direction TB
A1["Parse observation"] --> A2["Reason over\npolicy + code"]
A2 --> A3["Emit one\nJSON action"]
end
subgraph Training["Training, eval, demo"]
direction TB
T1["Parallel rollouts\nfast cached reset"] --> T2["TRL GRPO + LoRA"]
T2 --> T3["Trackio metrics\nreward + pass rates"]
T3 --> T4["Held-out eval\nbaseline vs trained"]
T4 --> T5["Demo artifacts\nrollouts + summaries"]
end
subgraph Feedback["Feedback loop"]
direction LR
B1["Episode logs"] --> B2["Failure analysis"]
B2 --> B3["Sampling weights\nand new jobs"]
end
F6 == cached bundle ==> R1
R8 -- observation --> A1
A3 -- JSON action --> R4
R7 -- terminal reward --> T1
T2 -. adapter checkpoint .-> A2
R8 -- episode logs --> B1
B3 -. cache refresh .-> F1
class F1,F2,F3,F4,F5,F6 factory;
class R1,R2,R3,R4,R5,R6,R7,R8 runtime;
class A1,A2,A3 agent;
class T1,T2,T3,T4,T5 training;
class B1,B2,B3 feedback;
linkStyle default stroke:#0f172a,stroke-width:2px;