Spaces:
Sleeping
Sleeping
| %%{init: {"theme": "base", "themeCSS": "svg { background: #ffffff; }", "themeVariables": {"background": "#ffffff", "mainBkg": "#ffffff", "edgeLabelBackground": "#ffffff", "fontFamily": "Arial, Helvetica, sans-serif", "primaryTextColor": "#111827", "lineColor": "#0f172a", "clusterBkg": "#ffffff", "clusterBorder": "#cbd5e1"}, "flowchart": {"htmlLabels": false, "curve": "basis", "nodeSpacing": 60, "rankSpacing": 80, "padding": 24}}}%% | |
| flowchart LR | |
| classDef factory fill:#eff6ff,stroke:#2563eb,stroke-width:2px,color:#111827; | |
| classDef runtime fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#111827; | |
| classDef agent fill:#fff7ed,stroke:#ea580c,stroke-width:2px,color:#111827; | |
| classDef training fill:#f5f3ff,stroke:#7c3aed,stroke-width:2px,color:#111827; | |
| classDef feedback fill:#f1f5f9,stroke:#64748b,stroke-width:2px,color:#111827; | |
| subgraph Factory["Scenario factory\noffline authoring"] | |
| direction TB | |
| F1["LLM author\nconfig-driven drafts"] --> F2["ScenarioSpec\npolicy + bug target"] | |
| F2 --> F3["A01 mutator\nFastAPI variants + traps"] | |
| F3 --> F4["Compiler\nexecutable app bundle"] | |
| F4 --> F5["Verifier\nvisible + hidden tests"] | |
| F5 --> F6["Versioned cache\nsplit + difficulty + hash"] | |
| end | |
| subgraph Runtime["OpenEnv runtime\ncache-backed episodes"] | |
| direction TB | |
| R1["reset(seed)\nload cached bundle"] --> R2["Curriculum sampler\nvalidated slice"] | |
| R2 --> R3["Episode state\nphase + history + diff"] | |
| R3 --> R4["Typed tools\ninspect, request, patch"] | |
| R4 --> R5["App sandbox\ncloned workspace"] | |
| R5 --> R6["Verifier\nsecurity + regression"] | |
| R6 --> R7["Reward engine\ncomponents + penalties"] | |
| R7 --> R3 | |
| R3 --> R8["API + logger\n/ws, /step, artifacts"] | |
| end | |
| subgraph Agent["Single LLM agent"] | |
| direction TB | |
| A1["Parse observation"] --> A2["Reason over\npolicy + code"] | |
| A2 --> A3["Emit one\nJSON action"] | |
| end | |
| subgraph Training["Training, eval, demo"] | |
| direction TB | |
| T1["Parallel rollouts\nfast cached reset"] --> T2["TRL GRPO + LoRA"] | |
| T2 --> T3["Trackio metrics\nreward + pass rates"] | |
| T3 --> T4["Held-out eval\nbaseline vs trained"] | |
| T4 --> T5["Demo artifacts\nrollouts + summaries"] | |
| end | |
| subgraph Feedback["Feedback loop"] | |
| direction LR | |
| B1["Episode logs"] --> B2["Failure analysis"] | |
| B2 --> B3["Sampling weights\nand new jobs"] | |
| end | |
| F6 == cached bundle ==> R1 | |
| R8 -- observation --> A1 | |
| A3 -- JSON action --> R4 | |
| R7 -- terminal reward --> T1 | |
| T2 -. adapter checkpoint .-> A2 | |
| R8 -- episode logs --> B1 | |
| B3 -. cache refresh .-> F1 | |
| class F1,F2,F3,F4,F5,F6 factory; | |
| class R1,R2,R3,R4,R5,R6,R7,R8 runtime; | |
| class A1,A2,A3 agent; | |
| class T1,T2,T3,T4,T5 training; | |
| class B1,B2,B3 feedback; | |
| linkStyle default stroke:#0f172a,stroke-width:2px; | |