Spaces:

Humanlearning
/

Cyber_analyst-round1

Sleeping

App Files Files Community

Cyber_analyst-round1 / assets /architecture_diagram.mmd

Humanlearning

diagrams updated

5809a6c 15 days ago

raw

history blame contribute delete

2.93 kB

	%%{init: {"theme": "base", "themeCSS": "svg { background: #ffffff; }", "themeVariables": {"background": "#ffffff", "mainBkg": "#ffffff", "edgeLabelBackground": "#ffffff", "fontFamily": "Arial, Helvetica, sans-serif", "primaryTextColor": "#111827", "lineColor": "#0f172a", "clusterBkg": "#ffffff", "clusterBorder": "#cbd5e1"}, "flowchart": {"htmlLabels": false, "curve": "basis", "nodeSpacing": 60, "rankSpacing": 80, "padding": 24}}}%%
	flowchart LR
	classDef factory fill:#eff6ff,stroke:#2563eb,stroke-width:2px,color:#111827;
	classDef runtime fill:#ecfdf5,stroke:#059669,stroke-width:2px,color:#111827;
	classDef agent fill:#fff7ed,stroke:#ea580c,stroke-width:2px,color:#111827;
	classDef training fill:#f5f3ff,stroke:#7c3aed,stroke-width:2px,color:#111827;
	classDef feedback fill:#f1f5f9,stroke:#64748b,stroke-width:2px,color:#111827;

	subgraph Factory["Scenario factory\noffline authoring"]
	direction TB
	F1["LLM author\nconfig-driven drafts"] --> F2["ScenarioSpec\npolicy + bug target"]
	F2 --> F3["A01 mutator\nFastAPI variants + traps"]
	F3 --> F4["Compiler\nexecutable app bundle"]
	F4 --> F5["Verifier\nvisible + hidden tests"]
	F5 --> F6["Versioned cache\nsplit + difficulty + hash"]
	end

	subgraph Runtime["OpenEnv runtime\ncache-backed episodes"]
	direction TB
	R1["reset(seed)\nload cached bundle"] --> R2["Curriculum sampler\nvalidated slice"]
	R2 --> R3["Episode state\nphase + history + diff"]
	R3 --> R4["Typed tools\ninspect, request, patch"]
	R4 --> R5["App sandbox\ncloned workspace"]
	R5 --> R6["Verifier\nsecurity + regression"]
	R6 --> R7["Reward engine\ncomponents + penalties"]
	R7 --> R3
	R3 --> R8["API + logger\n/ws, /step, artifacts"]
	end

	subgraph Agent["Single LLM agent"]
	direction TB
	A1["Parse observation"] --> A2["Reason over\npolicy + code"]
	A2 --> A3["Emit one\nJSON action"]
	end

	subgraph Training["Training, eval, demo"]
	direction TB
	T1["Parallel rollouts\nfast cached reset"] --> T2["TRL GRPO + LoRA"]
	T2 --> T3["Trackio metrics\nreward + pass rates"]
	T3 --> T4["Held-out eval\nbaseline vs trained"]
	T4 --> T5["Demo artifacts\nrollouts + summaries"]
	end

	subgraph Feedback["Feedback loop"]
	direction LR
	B1["Episode logs"] --> B2["Failure analysis"]
	B2 --> B3["Sampling weights\nand new jobs"]
	end

	F6 == cached bundle ==> R1
	R8 -- observation --> A1
	A3 -- JSON action --> R4
	R7 -- terminal reward --> T1
	T2 -. adapter checkpoint .-> A2
	R8 -- episode logs --> B1
	B3 -. cache refresh .-> F1

	class F1,F2,F3,F4,F5,F6 factory;
	class R1,R2,R3,R4,R5,R6,R7,R8 runtime;
	class A1,A2,A3 agent;
	class T1,T2,T3,T4,T5 training;
	class B1,B2,B3 feedback;
	linkStyle default stroke:#0f172a,stroke-width:2px;