%%{init: {"theme": "base", "themeCSS": "svg { background: #ffffff; }", "themeVariables": {"background": "#ffffff", "mainBkg": "#ffffff", "edgeLabelBackground": "#ffffff", "fontFamily": "Arial, Helvetica, sans-serif", "primaryTextColor": "#111827", "lineColor": "#0f172a", "clusterBkg": "#ffffff", "clusterBorder": "#cbd5e1"}, "flowchart": {"htmlLabels": false, "curve": "basis", "nodeSpacing": 60, "rankSpacing": 80, "padding": 24}}}%%
flowchart LR
    classDef factory fill:#eff6ff,stroke:#2563eb,stroke-width:2px,color:#111827;
    classDef runtime fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#111827;
    classDef agent fill:#fff7ed,stroke:#ea580c,stroke-width:2px,color:#111827;
    classDef training fill:#f5f3ff,stroke:#7c3aed,stroke-width:2px,color:#111827;
    classDef feedback fill:#f1f5f9,stroke:#64748b,stroke-width:2px,color:#111827;

    subgraph Factory["Scenario factory\noffline authoring"]
        direction TB
        F1["LLM author\nconfig-driven drafts"] --> F2["ScenarioSpec\npolicy + bug target"]
        F2 --> F3["A01 mutator\nFastAPI variants + traps"]
        F3 --> F4["Compiler\nexecutable app bundle"]
        F4 --> F5["Verifier\nvisible + hidden tests"]
        F5 --> F6["Versioned cache\nsplit + difficulty + hash"]
    end

    subgraph Runtime["OpenEnv runtime\ncache-backed episodes"]
        direction TB
        R1["reset(seed)\nload cached bundle"] --> R2["Curriculum sampler\nvalidated slice"]
        R2 --> R3["Episode state\nphase + history + diff"]
        R3 --> R4["Typed tools\ninspect, request, patch"]
        R4 --> R5["App sandbox\ncloned workspace"]
        R5 --> R6["Verifier\nsecurity + regression"]
        R6 --> R7["Reward engine\ncomponents + penalties"]
        R7 --> R3
        R3 --> R8["API + logger\n/ws, /step, artifacts"]
    end

    subgraph Agent["Single LLM agent"]
        direction TB
        A1["Parse observation"] --> A2["Reason over\npolicy + code"]
        A2 --> A3["Emit one\nJSON action"]
    end

    subgraph Training["Training, eval, demo"]
        direction TB
        T1["Parallel rollouts\nfast cached reset"] --> T2["TRL GRPO + LoRA"]
        T2 --> T3["Trackio metrics\nreward + pass rates"]
        T3 --> T4["Held-out eval\nbaseline vs trained"]
        T4 --> T5["Demo artifacts\nrollouts + summaries"]
    end

    subgraph Feedback["Feedback loop"]
        direction LR
        B1["Episode logs"] --> B2["Failure analysis"]
        B2 --> B3["Sampling weights\nand new jobs"]
    end

    F6 == cached bundle ==> R1
    R8 -- observation --> A1
    A3 -- JSON action --> R4
    R7 -- terminal reward --> T1
    T2 -. adapter checkpoint .-> A2
    R8 -- episode logs --> B1
    B3 -. cache refresh .-> F1

    class F1,F2,F3,F4,F5,F6 factory;
    class R1,R2,R3,R4,R5,R6,R7,R8 runtime;
    class A1,A2,A3 agent;
    class T1,T2,T3,T4,T5 training;
    class B1,B2,B3 feedback;
    linkStyle default stroke:#0f172a,stroke-width:2px;