%%{init: {"theme": "base", "themeCSS": "svg { background: #ffffff; }", "themeVariables": {"background": "#ffffff", "mainBkg": "#ffffff", "edgeLabelBackground": "#ffffff", "fontFamily": "Arial, Helvetica, sans-serif", "primaryTextColor": "#111827", "lineColor": "#0f172a", "clusterBkg": "#ffffff", "clusterBorder": "#cbd5e1"}, "flowchart": {"htmlLabels": false, "curve": "basis", "nodeSpacing": 60, "rankSpacing": 80, "padding": 24}}}%% flowchart LR classDef factory fill:#eff6ff,stroke:#2563eb,stroke-width:2px,color:#111827; classDef runtime fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#111827; classDef agent fill:#fff7ed,stroke:#ea580c,stroke-width:2px,color:#111827; classDef training fill:#f5f3ff,stroke:#7c3aed,stroke-width:2px,color:#111827; classDef feedback fill:#f1f5f9,stroke:#64748b,stroke-width:2px,color:#111827; subgraph Factory["Scenario factory\noffline authoring"] direction TB F1["LLM author\nconfig-driven drafts"] --> F2["ScenarioSpec\npolicy + bug target"] F2 --> F3["A01 mutator\nFastAPI variants + traps"] F3 --> F4["Compiler\nexecutable app bundle"] F4 --> F5["Verifier\nvisible + hidden tests"] F5 --> F6["Versioned cache\nsplit + difficulty + hash"] end subgraph Runtime["OpenEnv runtime\ncache-backed episodes"] direction TB R1["reset(seed)\nload cached bundle"] --> R2["Curriculum sampler\nvalidated slice"] R2 --> R3["Episode state\nphase + history + diff"] R3 --> R4["Typed tools\ninspect, request, patch"] R4 --> R5["App sandbox\ncloned workspace"] R5 --> R6["Verifier\nsecurity + regression"] R6 --> R7["Reward engine\ncomponents + penalties"] R7 --> R3 R3 --> R8["API + logger\n/ws, /step, artifacts"] end subgraph Agent["Single LLM agent"] direction TB A1["Parse observation"] --> A2["Reason over\npolicy + code"] A2 --> A3["Emit one\nJSON action"] end subgraph Training["Training, eval, demo"] direction TB T1["Parallel rollouts\nfast cached reset"] --> T2["TRL GRPO + LoRA"] T2 --> T3["Trackio metrics\nreward + pass rates"] T3 --> T4["Held-out eval\nbaseline vs trained"] T4 --> T5["Demo artifacts\nrollouts + summaries"] end subgraph Feedback["Feedback loop"] direction LR B1["Episode logs"] --> B2["Failure analysis"] B2 --> B3["Sampling weights\nand new jobs"] end F6 == cached bundle ==> R1 R8 -- observation --> A1 A3 -- JSON action --> R4 R7 -- terminal reward --> T1 T2 -. adapter checkpoint .-> A2 R8 -- episode logs --> B1 B3 -. cache refresh .-> F1 class F1,F2,F3,F4,F5,F6 factory; class R1,R2,R3,R4,R5,R6,R7,R8 runtime; class A1,A2,A3 agent; class T1,T2,T3,T4,T5 training; class B1,B2,B3 feedback; linkStyle default stroke:#0f172a,stroke-width:2px;