%%{init: {"theme": "base", "themeCSS": "svg { background: #ffffff; }", "themeVariables": {"background": "#ffffff", "mainBkg": "#ffffff", "edgeLabelBackground": "#ffffff", "fontFamily": "Arial, Helvetica, sans-serif", "primaryTextColor": "#111827", "lineColor": "#0f172a", "clusterBkg": "#ffffff", "clusterBorder": "#cbd5e1"}, "flowchart": {"htmlLabels": false, "curve": "basis", "nodeSpacing": 58, "rankSpacing": 70, "padding": 24}}}%% flowchart TD classDef setup fill:#eff6ff,stroke:#2563eb,stroke-width:2px,color:#111827; classDef episode fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#111827; classDef train fill:#f5f3ff,stroke:#7c3aed,stroke-width:2px,color:#111827; classDef adapt fill:#fff7ed,stroke:#ea580c,stroke-width:2px,color:#111827; classDef artifact fill:#f1f5f9,stroke:#64748b,stroke-width:2px,color:#111827; Start["Start run\nbase model + config"] --> Cache["Prepare cache\ntrain / validation / hidden_eval"] Cache --> Require["Modal cache mode\nrequire"] Require --> Baseline["Baseline eval\nscripted or model rollouts"] Baseline --> TrainLoop["GRPO training loop"] subgraph Episode["One OpenEnv episode"] direction TB Reset["reset(seed)\nload cached app + policy"] --> Observe["Observation\nphase, hints, tools"] Observe --> Prompt["Build prompt\nJSON action only"] Prompt --> Generate["Model generates\none action"] Generate --> Step["step(action)\nphase gate + tool"] Step --> Done{"done?"} Done -- no --> Observe Done -- yes --> Verify["Terminal verifier\nsecurity + regression + anti-cheat"] Verify --> Rewards["Reward components\ndiscovery, security, regression, safety"] end TrainLoop --> Reset Rewards --> Update["GRPO update\nLoRA checkpoint"] Update --> Metrics["Trackio logging\nrewards, pass rates, latency"] Metrics --> Decision{"next step?"} Decision -- continue --> TrainLoop Decision -- rebalance --> Curriculum["Curriculum controller\nsampling weights"] Curriculum --> TrainLoop Decision -- weak spot --> Refresh["Async cache refresh\nnew validated bundles"] Refresh --> Cache Decision -- final --> Heldout["Held-out eval\nunseen seeds and layouts"] Heldout --> Compare["Before/after summary\nsuccess + reward lift"] Compare --> Artifacts["Saved artifacts\noutputs/evals + outputs/rollouts"] class Start,Cache,Require,Baseline setup; class Reset,Observe,Prompt,Generate,Step,Done,Verify,Rewards episode; class TrainLoop,Update,Metrics,Heldout,Compare train; class Decision,Curriculum,Refresh adapt; class Artifacts artifact; linkStyle default stroke:#0f172a,stroke-width:2px;