Spaces:
Sleeping
Sleeping
Commit ·
6abc8c5
1
Parent(s): 4e663d8
feat: implement RL environment server with training infrastructure and Modal integration
Browse files- 01_ARCHITECTURE.md +97 -86
- README.md +21 -5
- assets/architecture_diagram.mmd +35 -30
- assets/architecture_diagram.svg +73 -78
- models.py +9 -0
- pyproject.toml +11 -0
- rewards.py +24 -2
- scenario_compiler.py +18 -58
- scripts/modal_ephemeral_train.py +2 -0
- scripts/modal_train_grpo.py +2 -0
- scripts/track_pytest.py +1 -0
- server/CyberSecurity_OWASP_environment.py +79 -126
- server/__init__.py +11 -1
- server/action_tools.py +73 -0
- server/adversarial_designer.py +59 -0
- server/app_sandbox.py +139 -0
- server/authz_oracle.py +92 -0
- server/curriculum.py +99 -0
- server/episode_logger.py +66 -0
- server/reward_engine.py +10 -31
- server/scenario_factory.py +134 -0
- server/verifier.py +81 -0
- tests/test_closed_loop_runtime.py +94 -0
- tests/test_web_interface.py +1 -1
- training/rollout.py +15 -0
01_ARCHITECTURE.md
CHANGED
|
@@ -22,64 +22,56 @@ Editable source: `assets/architecture_diagram.mmd`
|
|
| 22 |
|
| 23 |
```mermaid
|
| 24 |
flowchart TB
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
A2 --> A4
|
| 36 |
-
A3 --> A4
|
| 37 |
-
A5 --> A4
|
| 38 |
end
|
| 39 |
|
| 40 |
-
%% =========================
|
| 41 |
-
%% OpenEnv Runtime
|
| 42 |
-
%% =========================
|
| 43 |
subgraph B[CyberSecurity_OWASP OpenEnv Server]
|
| 44 |
-
B1[reset\(\)\nselect
|
| 45 |
-
B2[
|
| 46 |
-
B3[
|
| 47 |
-
B4[
|
| 48 |
-
B5[
|
| 49 |
-
B6[
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
B4 --> B6
|
|
|
|
|
|
|
| 55 |
end
|
| 56 |
|
| 57 |
-
%% =========================
|
| 58 |
-
%% Agent + Training
|
| 59 |
-
%% =========================
|
| 60 |
subgraph C[Single LLM Agent]
|
| 61 |
C1[Observation Parser]
|
| 62 |
-
C2[
|
| 63 |
-
C3[
|
| 64 |
C1 --> C2 --> C3
|
| 65 |
end
|
| 66 |
|
| 67 |
subgraph D[Training + Evaluation]
|
| 68 |
-
D1[Rollout Loop\nreset → step* →
|
| 69 |
-
D2[
|
| 70 |
-
D3[Trackio Metrics\nreward curves, pass rates,
|
| 71 |
-
D4[Held-out
|
| 72 |
-
D5[Demo Artifacts\nbefore/after traces
|
| 73 |
-
D1 --> D2 --> D3
|
| 74 |
-
D3 --> D4 --> D5
|
| 75 |
end
|
| 76 |
|
| 77 |
-
|
| 78 |
C3 -->|typed action| B3
|
| 79 |
B3 -->|observation + reward + done| C1
|
| 80 |
-
|
| 81 |
D2 --> C1
|
| 82 |
-
|
| 83 |
```
|
| 84 |
|
| 85 |
## 3. Component responsibilities
|
|
@@ -113,6 +105,13 @@ The scenario compiler is the main anti-overfitting mechanism. It should vary:
|
|
| 113 |
- visible test coverage;
|
| 114 |
- hidden invariant seeds.
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
### 3.2 Policy Graph Generator
|
| 117 |
|
| 118 |
The policy graph is the ground truth for intended behavior.
|
|
@@ -222,16 +221,19 @@ Observations should be compact and structured.
|
|
| 222 |
```python
|
| 223 |
@dataclass
|
| 224 |
class CyberSecurityOWASPObservation(Observation):
|
|
|
|
| 225 |
message: str
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
|
|
|
|
|
|
| 231 |
done_reason: str | None = None
|
| 232 |
```
|
| 233 |
|
| 234 |
-
|
| 235 |
|
| 236 |
### 3.7 State schema
|
| 237 |
|
|
@@ -241,17 +243,18 @@ State should support debugging and training analytics.
|
|
| 241 |
@dataclass
|
| 242 |
class CyberSecurityOWASPState(State):
|
| 243 |
episode_id: str
|
| 244 |
-
|
| 245 |
-
split: Literal["train", "validation", "
|
| 246 |
step_count: int = 0
|
| 247 |
-
max_steps: int =
|
|
|
|
| 248 |
scenario_family: str = ""
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
accumulated_reward: float = 0.0
|
| 256 |
```
|
| 257 |
|
|
@@ -259,9 +262,10 @@ class CyberSecurityOWASPState(State):
|
|
| 259 |
|
| 260 |
```text
|
| 261 |
1. reset()
|
| 262 |
-
-
|
| 263 |
-
-
|
| 264 |
-
-
|
|
|
|
| 265 |
- return initial observation
|
| 266 |
|
| 267 |
2. agent loop
|
|
@@ -275,52 +279,58 @@ class CyberSecurityOWASPState(State):
|
|
| 275 |
- freeze patch
|
| 276 |
- run public tests
|
| 277 |
- run hidden authorization invariants
|
| 278 |
-
- run
|
|
|
|
| 279 |
- compute deterministic reward
|
| 280 |
- return final observation, reward, done=True
|
| 281 |
|
| 282 |
4. logging
|
| 283 |
-
-
|
|
|
|
| 284 |
- send metrics to Trackio during training/eval
|
| 285 |
```
|
| 286 |
|
| 287 |
## 5. Reward design
|
| 288 |
|
| 289 |
-
The reward should be deterministic, decomposed, and resistant to reward hacking.
|
| 290 |
|
| 291 |
-
|
| 292 |
|
| 293 |
-
```
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
```
|
| 302 |
|
| 303 |
### Reward components
|
| 304 |
|
| 305 |
-
| Component |
|
| 306 |
-
|---|---
|
| 307 |
-
|
|
| 308 |
-
|
|
| 309 |
-
|
|
| 310 |
-
|
|
| 311 |
-
|
|
| 312 |
-
|
|
|
|
|
|
|
|
| 313 |
|
| 314 |
### Penalties
|
| 315 |
|
| 316 |
| Penalty | Trigger |
|
| 317 |
|---|---|
|
| 318 |
-
|
|
| 319 |
-
|
|
| 320 |
-
|
|
| 321 |
-
|
|
| 322 |
-
| `-0.10` | Patch exceeds file or diff-size budget. |
|
| 323 |
-
| `-1.00` | Attempts external network access, credential extraction, persistence, or unsafe behavior. |
|
| 324 |
|
| 325 |
The LLM judge, if used at all, should only annotate trace quality for analysis. It must not decide security-critical reward.
|
| 326 |
|
|
@@ -488,3 +498,4 @@ Expected endpoints:
|
|
| 488 |
| OpenEnv deployment docs | Informs HF Spaces deployment, endpoints, Docker workflow, and installable client package. | 8.5/10 |
|
| 489 |
| Hackathon judging criteria | Informs demo priorities: innovation, storytelling, reward improvement, and training pipeline. | 9/10 |
|
| 490 |
| TRL/OpenEnv training example | Informs rollout function, decomposed reward functions, and Trackio logging pattern. | 8/10 |
|
|
|
|
|
|
| 22 |
|
| 23 |
```mermaid
|
| 24 |
flowchart TB
|
| 25 |
+
subgraph A[Scenario + Curriculum Factory]
|
| 26 |
+
A1[Policy Graph Generator\nroles, users, tenants, ownership]
|
| 27 |
+
A2[Curriculum Controller\nmastery, weak spots, difficulty tier]
|
| 28 |
+
A3[Bounded Adversarial Designer\nsafe local scenario targets]
|
| 29 |
+
A4[Template Renderer\nFastAPI routes, services, auth helpers]
|
| 30 |
+
A5[A01 Bug Mutator\nIDOR, tenant, role, public-route traps]
|
| 31 |
+
A6[ScenarioSpec + Oracle\nvisible hints + hidden policy tuples]
|
| 32 |
+
A1 --> A3
|
| 33 |
+
A2 --> A3
|
| 34 |
+
A3 --> A4 --> A5 --> A6
|
|
|
|
|
|
|
|
|
|
| 35 |
end
|
| 36 |
|
|
|
|
|
|
|
|
|
|
| 37 |
subgraph B[CyberSecurity_OWASP OpenEnv Server]
|
| 38 |
+
B1[reset\(seed, difficulty\)\nselect curriculum profile]
|
| 39 |
+
B2[Episode State Store\nphase, history, metrics, weakness, patch diff]
|
| 40 |
+
B3[Typed Action Tools\ninspect, request, patch, visible tests]
|
| 41 |
+
B4[Ephemeral App Sandbox\ncode workspace + fixtures + local API model]
|
| 42 |
+
B5[Multi-layer Verifier\nvisible, hidden, oracle, regression]
|
| 43 |
+
B6[Deterministic Reward Engine\nstable components + penalties]
|
| 44 |
+
B7[Episode Artifact Logger\nJSONL transcript + verifier + diff]
|
| 45 |
+
B8[state\(\)\nstructured metadata for debugging/eval]
|
| 46 |
+
B1 --> B2 --> B3
|
| 47 |
+
B3 <--> B4
|
| 48 |
+
B4 --> B5 --> B6 --> B2
|
| 49 |
+
B2 --> B7 --> A2
|
| 50 |
+
B2 --> B8
|
| 51 |
end
|
| 52 |
|
|
|
|
|
|
|
|
|
|
| 53 |
subgraph C[Single LLM Agent]
|
| 54 |
C1[Observation Parser]
|
| 55 |
+
C2[AuthZ + Code Reasoning]
|
| 56 |
+
C3[Discover → Diagnose → Patch → Test\none JSON action]
|
| 57 |
C1 --> C2 --> C3
|
| 58 |
end
|
| 59 |
|
| 60 |
subgraph D[Training + Evaluation]
|
| 61 |
+
D1[Parallel Rollout Loop\nreset → step* → terminal reward]
|
| 62 |
+
D2[TRL GRPO + LoRA]
|
| 63 |
+
D3[Trackio Metrics\nreward curves, pass rates, failure modes]
|
| 64 |
+
D4[Held-out Family Eval\nbase vs trained model]
|
| 65 |
+
D5[Demo Artifacts\nbefore/after traces + JSONL]
|
| 66 |
+
D1 --> D2 --> D3 --> D4 --> D5
|
|
|
|
| 67 |
end
|
| 68 |
|
| 69 |
+
A6 --> B1
|
| 70 |
C3 -->|typed action| B3
|
| 71 |
B3 -->|observation + reward + done| C1
|
| 72 |
+
B6 --> D1
|
| 73 |
D2 --> C1
|
| 74 |
+
B6 --> D4
|
| 75 |
```
|
| 76 |
|
| 77 |
## 3. Component responsibilities
|
|
|
|
| 105 |
- visible test coverage;
|
| 106 |
- hidden invariant seeds.
|
| 107 |
|
| 108 |
+
The runtime now treats curriculum and adversarial targeting as first-class scenario inputs:
|
| 109 |
+
|
| 110 |
+
- `CurriculumController` tracks target weakness mastery, recent reward trend, failure counts, and difficulty tier.
|
| 111 |
+
- `BoundedAdversarialDesigner` chooses safe synthetic lab targets such as same-role cross-object access, cross-tenant boundaries, public-route overlocking, alternate-service reachability, and visible-test-only traps.
|
| 112 |
+
- `ScenarioFactory` combines the policy graph, curriculum profile, adversarial target, renderer, and hidden oracle metadata into one deterministic scenario spec.
|
| 113 |
+
- Hidden-eval episodes hold out scenario families, not only seeds, by marking evaluation-only scenario-family metadata in state rather than observations.
|
| 114 |
+
|
| 115 |
### 3.2 Policy Graph Generator
|
| 116 |
|
| 117 |
The policy graph is the ground truth for intended behavior.
|
|
|
|
| 221 |
```python
|
| 222 |
@dataclass
|
| 223 |
class CyberSecurityOWASPObservation(Observation):
|
| 224 |
+
phase: Literal["discover", "patch", "done"]
|
| 225 |
message: str
|
| 226 |
+
task_brief: str
|
| 227 |
+
visible_policy_hint: dict
|
| 228 |
+
workspace_summary: dict
|
| 229 |
+
available_actions: list[str]
|
| 230 |
+
last_tool_result: str
|
| 231 |
+
visible_test_result: str | None = None
|
| 232 |
+
reward_breakdown: dict[str, float] = field(default_factory=dict)
|
| 233 |
done_reason: str | None = None
|
| 234 |
```
|
| 235 |
|
| 236 |
+
The policy hint is deliberately partial. It may include product rules, fixture aliases, route summaries, and public-route intent, but it must not expose the hidden oracle matrix, hidden test bodies, injected bug labels, or held-out family labels.
|
| 237 |
|
| 238 |
### 3.7 State schema
|
| 239 |
|
|
|
|
| 243 |
@dataclass
|
| 244 |
class CyberSecurityOWASPState(State):
|
| 245 |
episode_id: str
|
| 246 |
+
task_id: str
|
| 247 |
+
split: Literal["train", "validation", "hidden_eval"]
|
| 248 |
step_count: int = 0
|
| 249 |
+
max_steps: int = 40
|
| 250 |
+
difficulty_tier: str = "warmup"
|
| 251 |
scenario_family: str = ""
|
| 252 |
+
template_id: str = "fastapi_basic"
|
| 253 |
+
target_weakness: str = ""
|
| 254 |
+
curriculum_snapshot: dict = field(default_factory=dict)
|
| 255 |
+
verification_summary: dict = field(default_factory=dict)
|
| 256 |
+
patch_diff: str = ""
|
| 257 |
+
episode_artifact_path: str | None = None
|
| 258 |
accumulated_reward: float = 0.0
|
| 259 |
```
|
| 260 |
|
|
|
|
| 262 |
|
| 263 |
```text
|
| 264 |
1. reset()
|
| 265 |
+
- curriculum selects difficulty tier and target weakness
|
| 266 |
+
- bounded adversarial designer chooses a safe local scenario target
|
| 267 |
+
- scenario factory compiles app from policy graph + template + injected bug
|
| 268 |
+
- initialize ephemeral app sandbox and fixture state
|
| 269 |
- return initial observation
|
| 270 |
|
| 271 |
2. agent loop
|
|
|
|
| 279 |
- freeze patch
|
| 280 |
- run public tests
|
| 281 |
- run hidden authorization invariants
|
| 282 |
+
- run policy-oracle matrix
|
| 283 |
+
- run regression and public-route preservation tests
|
| 284 |
- compute deterministic reward
|
| 285 |
- return final observation, reward, done=True
|
| 286 |
|
| 287 |
4. logging
|
| 288 |
+
- append JSONL artifact with scenario metadata, action trace, observations, patch diff, verifier result, and reward components
|
| 289 |
+
- feed terminal success/failure back into curriculum mastery tracking
|
| 290 |
- send metrics to Trackio during training/eval
|
| 291 |
```
|
| 292 |
|
| 293 |
## 5. Reward design
|
| 294 |
|
| 295 |
+
The reward should be deterministic, decomposed, and resistant to reward hacking. The maximum terminal reward remains **15.0** and high reward requires deterministic verifier success, not explanation quality.
|
| 296 |
|
| 297 |
+
Stable reward keys:
|
| 298 |
|
| 299 |
+
```python
|
| 300 |
+
{
|
| 301 |
+
"discovery": 0.0,
|
| 302 |
+
"security": 0.0,
|
| 303 |
+
"regression": 0.0,
|
| 304 |
+
"public_routes": 0.0,
|
| 305 |
+
"patch_quality": 0.0,
|
| 306 |
+
"visible_tests": 0.0,
|
| 307 |
+
"safety": 0.0,
|
| 308 |
+
"anti_cheat": 0.0,
|
| 309 |
+
"total": 0.0,
|
| 310 |
+
}
|
| 311 |
```
|
| 312 |
|
| 313 |
### Reward components
|
| 314 |
|
| 315 |
+
| Component | Purpose |
|
| 316 |
+
|---|---|
|
| 317 |
+
| `discovery` | Valid local evidence and correct violated policy rule. |
|
| 318 |
+
| `security` | Hidden exploit blocking plus policy-oracle matrix pass. |
|
| 319 |
+
| `regression` | Legitimate owner/admin/support flows still work. |
|
| 320 |
+
| `public_routes` | Intentionally public routes remain public. |
|
| 321 |
+
| `patch_quality` | Localized policy-aligned patch and efficient phase order. |
|
| 322 |
+
| `visible_tests` | Visible tests pass and app still boots. |
|
| 323 |
+
| `safety` | Penalizes invalid action patterns, unsafe targets, timeouts, and deny-all behavior. |
|
| 324 |
+
| `anti_cheat` | Penalizes hidden-file probing, hardcoded fixture IDs, and test/oracle tampering. |
|
| 325 |
|
| 326 |
### Penalties
|
| 327 |
|
| 328 |
| Penalty | Trigger |
|
| 329 |
|---|---|
|
| 330 |
+
| public route penalty | Breaks a route intentionally marked public. |
|
| 331 |
+
| anti-cheat penalty | Deletes or probes tests, hidden files, reward code, oracle data, or host paths. |
|
| 332 |
+
| hardcoding penalty | Hardcodes seed-specific IDs, users, tenants, or hidden assumptions. |
|
| 333 |
+
| safety penalty | Over-broad denial, malformed/invalid actions, repeated failed actions, or external target attempts. |
|
|
|
|
|
|
|
| 334 |
|
| 335 |
The LLM judge, if used at all, should only annotate trace quality for analysis. It must not decide security-critical reward.
|
| 336 |
|
|
|
|
| 498 |
| OpenEnv deployment docs | Informs HF Spaces deployment, endpoints, Docker workflow, and installable client package. | 8.5/10 |
|
| 499 |
| Hackathon judging criteria | Informs demo priorities: innovation, storytelling, reward improvement, and training pipeline. | 9/10 |
|
| 500 |
| TRL/OpenEnv training example | Informs rollout function, decomposed reward functions, and Trackio logging pattern. | 8/10 |
|
| 501 |
+
| Kube SRE Gym README | Informs the closed-loop pattern: adversarial scenario design, curriculum mastery tracking, real tool interaction, verification, and artifact-driven storytelling. | 8/10 |
|
README.md
CHANGED
|
@@ -21,7 +21,7 @@ tags:
|
|
| 21 |
inspect generated app + policy -> discover authorization bug -> submit finding -> patch code -> preserve intended behavior
|
| 22 |
```
|
| 23 |
|
| 24 |
-
The current implementation includes a functional MVP scenario: an invoices FastAPI-style app with one injected OWASP A01 BOLA/IDOR defect,
|
| 25 |
|
| 26 |
## Diagrams
|
| 27 |
|
|
@@ -98,27 +98,43 @@ Terminal reward uses stable components:
|
|
| 98 |
}
|
| 99 |
```
|
| 100 |
|
| 101 |
-
The verifier rewards blocking the hidden exploit while preserving legitimate owner/admin behavior and intentionally public routes. It penalizes deny-all fixes, hardcoded IDs, hidden file probes, external URL attempts, and test/fixture tampering.
|
| 102 |
|
| 103 |
## Scenario Generation
|
| 104 |
|
| 105 |
-
`reset(seed)`
|
| 106 |
|
| 107 |
- invoices domain policy graph;
|
|
|
|
| 108 |
- randomized users, tenants, invoices, and IDs;
|
| 109 |
- generated app files under `app/`;
|
| 110 |
- visible tests under `tests/test_visible.py`;
|
| 111 |
-
- hidden facts
|
| 112 |
|
| 113 |
Additional domains and bug families are scaffolded for extension.
|
| 114 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
## Testing
|
| 116 |
|
| 117 |
```bash
|
| 118 |
uv run --extra dev pytest
|
| 119 |
```
|
| 120 |
|
| 121 |
-
The suite covers model serialization, reset/step/state behavior, seed reproducibility, invalid actions, reward outcomes, anti-cheat checks,
|
| 122 |
|
| 123 |
## Training Scaffold
|
| 124 |
|
|
|
|
| 21 |
inspect generated app + policy -> discover authorization bug -> submit finding -> patch code -> preserve intended behavior
|
| 22 |
```
|
| 23 |
|
| 24 |
+
The current implementation includes a functional closed-loop MVP scenario: an invoices FastAPI-style app with one injected OWASP A01 BOLA/IDOR defect, curriculum-aware scenario selection, bounded adversarial targeting, an ephemeral app sandbox, multi-layer deterministic verifier checks, anti-cheat safeguards, JSONL episode artifacts, and decomposed reward.
|
| 25 |
|
| 26 |
## Diagrams
|
| 27 |
|
|
|
|
| 98 |
}
|
| 99 |
```
|
| 100 |
|
| 101 |
+
The verifier rewards blocking the hidden exploit while preserving legitimate owner/admin behavior and intentionally public routes. Terminal scoring requires visible checks, hidden authorization checks, a policy-oracle matrix, regression checks, public-route preservation, and patch-quality checks. It penalizes deny-all fixes, hardcoded IDs, repeated/invalid action patterns, hidden file probes, external URL attempts, and test/fixture tampering.
|
| 102 |
|
| 103 |
## Scenario Generation
|
| 104 |
|
| 105 |
+
`reset(seed)` asks the `CurriculumController` for a difficulty tier and target weakness, then `ScenarioFactory` uses a bounded adversarial designer to compile a fresh isolated workspace under a temp directory. The MVP compiler generates:
|
| 106 |
|
| 107 |
- invoices domain policy graph;
|
| 108 |
+
- bounded adversarial target metadata such as same-role cross-object access, cross-tenant access, public-route overlocking traps, alternate route/service reachability, or visible-test-only edge cases;
|
| 109 |
- randomized users, tenants, invoices, and IDs;
|
| 110 |
- generated app files under `app/`;
|
| 111 |
- visible tests under `tests/test_visible.py`;
|
| 112 |
+
- hidden facts, oracle tuples, scenario family metadata, and verifier targets kept out of observations.
|
| 113 |
|
| 114 |
Additional domains and bug families are scaffolded for extension.
|
| 115 |
|
| 116 |
+
## Runtime Components
|
| 117 |
+
|
| 118 |
+
The OpenEnv runtime is split into small server modules:
|
| 119 |
+
|
| 120 |
+
- `server/curriculum.py` tracks mastery, weak spots, reward trend, and difficulty tier.
|
| 121 |
+
- `server/adversarial_designer.py` chooses safe synthetic scenario targets from tracked weaknesses.
|
| 122 |
+
- `server/scenario_factory.py` compiles the generated app, visible hints, hidden facts, scenario family, and template metadata.
|
| 123 |
+
- `server/app_sandbox.py` handles editable workspace reads, patches, local requests, and OpenAPI summaries.
|
| 124 |
+
- `server/action_tools.py` dispatches typed tools through the sandbox.
|
| 125 |
+
- `server/authz_oracle.py` builds the hidden allowed/denied user-resource-action matrix.
|
| 126 |
+
- `server/verifier.py` aggregates visible tests, hidden tests, oracle matrix, regression/public-route checks, and patch quality.
|
| 127 |
+
- `server/episode_logger.py` appends JSONL rollouts under `outputs/rollouts/`.
|
| 128 |
+
|
| 129 |
+
The agent sees partial observations only: product rules, fixture aliases, route summaries, visible test results, and action errors. Hidden tests, oracle tuples, injected bug labels, and held-out scenario-family labels stay internal.
|
| 130 |
+
|
| 131 |
## Testing
|
| 132 |
|
| 133 |
```bash
|
| 134 |
uv run --extra dev pytest
|
| 135 |
```
|
| 136 |
|
| 137 |
+
The suite covers model serialization, reset/step/state behavior, seed reproducibility, invalid actions, reward outcomes, anti-cheat checks, scripted rollout policies, curriculum selection, adversarial targeting, held-out scenario families, oracle checks, verifier aggregation, and episode artifact logging.
|
| 138 |
|
| 139 |
## Training Scaffold
|
| 140 |
|
assets/architecture_diagram.mmd
CHANGED
|
@@ -1,46 +1,51 @@
|
|
| 1 |
-
flowchart
|
| 2 |
-
subgraph Factory["Scenario Factory"]
|
| 3 |
-
Policy["Policy graph\nusers, roles, tenants, ownership"]
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
|
|
|
|
|
|
|
|
|
| 10 |
Mutator --> Compiler
|
| 11 |
-
Fixtures --> Compiler
|
| 12 |
end
|
| 13 |
|
| 14 |
subgraph Runtime["CyberSecurity_OWASP OpenEnv Runtime"]
|
| 15 |
-
Reset["reset(seed)\
|
| 16 |
-
|
| 17 |
-
Tools["Typed
|
| 18 |
-
Sandbox["
|
| 19 |
-
Verifier["
|
| 20 |
-
Reward["
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
| 24 |
Tools <--> Sandbox
|
| 25 |
-
|
| 26 |
Verifier --> Reward
|
| 27 |
-
Reward -->
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
end
|
| 30 |
|
| 31 |
subgraph Agent["Single LLM Agent"]
|
| 32 |
Obs["Observation parser"]
|
| 33 |
-
Reason["
|
| 34 |
-
Act["
|
| 35 |
Obs --> Reason --> Act
|
| 36 |
end
|
| 37 |
|
| 38 |
-
subgraph Ops["Training
|
| 39 |
-
Rollout["
|
| 40 |
-
GRPO["TRL GRPO
|
| 41 |
-
Trackio["Trackio
|
| 42 |
-
Eval["Held-out
|
| 43 |
-
Artifacts["
|
| 44 |
Rollout --> GRPO --> Trackio --> Eval --> Artifacts
|
| 45 |
end
|
| 46 |
|
|
|
|
| 1 |
+
flowchart TB
|
| 2 |
+
subgraph Factory["Scenario + Curriculum Factory"]
|
| 3 |
+
Policy["Policy graph generator\nusers, roles, tenants, ownership"]
|
| 4 |
+
Curriculum["Curriculum controller\nmastery, weak spots, difficulty tier"]
|
| 5 |
+
Designer["Bounded adversarial designer\nsafe local scenario targets"]
|
| 6 |
+
Templates["Template renderer\nFastAPI routes, services, auth helpers"]
|
| 7 |
+
Mutator["A01 bug mutator\nIDOR, tenant, role, public-route traps"]
|
| 8 |
+
Compiler["ScenarioSpec + oracle\nvisible hints + hidden policy tuples"]
|
| 9 |
+
Policy --> Designer
|
| 10 |
+
Curriculum --> Designer
|
| 11 |
+
Designer --> Templates
|
| 12 |
+
Templates --> Mutator
|
| 13 |
Mutator --> Compiler
|
|
|
|
| 14 |
end
|
| 15 |
|
| 16 |
subgraph Runtime["CyberSecurity_OWASP OpenEnv Runtime"]
|
| 17 |
+
Reset["reset(seed, difficulty)\nselect curriculum profile"]
|
| 18 |
+
State["Episode state store\nphase, history, metrics, weakness, patch diff"]
|
| 19 |
+
Tools["Typed action tools\ninspect, request, patch, visible tests"]
|
| 20 |
+
Sandbox["Ephemeral app sandbox\ncode workspace + fixture DB + local API model"]
|
| 21 |
+
Verifier["Multi-layer verifier\nvisible, hidden, oracle, regression"]
|
| 22 |
+
Reward["Deterministic reward engine\ncomponents + penalties"]
|
| 23 |
+
Logger["Episode artifact logger\nJSONL transcript + verifier + diff"]
|
| 24 |
+
App["FastAPI / WebSocket server\n/ws, /reset, /step, /state, /web"]
|
| 25 |
+
Reset --> State
|
| 26 |
+
State --> Tools
|
| 27 |
Tools <--> Sandbox
|
| 28 |
+
Sandbox --> Verifier
|
| 29 |
Verifier --> Reward
|
| 30 |
+
Reward --> State
|
| 31 |
+
State --> Logger
|
| 32 |
+
Logger --> Curriculum
|
| 33 |
+
State --> App
|
| 34 |
end
|
| 35 |
|
| 36 |
subgraph Agent["Single LLM Agent"]
|
| 37 |
Obs["Observation parser"]
|
| 38 |
+
Reason["AuthZ + code reasoning"]
|
| 39 |
+
Act["Discover -> Diagnose -> Patch -> Test\none JSON action at a time"]
|
| 40 |
Obs --> Reason --> Act
|
| 41 |
end
|
| 42 |
|
| 43 |
+
subgraph Ops["Training / Evaluation / Demo"]
|
| 44 |
+
Rollout["Parallel rollout loop\nreset -> step* -> terminal reward"]
|
| 45 |
+
GRPO["TRL GRPO + LoRA"]
|
| 46 |
+
Trackio["Trackio reward curves\npass rates and failure modes"]
|
| 47 |
+
Eval["Held-out family eval\nbase vs trained model"]
|
| 48 |
+
Artifacts["Demo artifacts\nbefore/after traces + JSONL"]
|
| 49 |
Rollout --> GRPO --> Trackio --> Eval --> Artifacts
|
| 50 |
end
|
| 51 |
|
assets/architecture_diagram.svg
CHANGED
|
|
|
|
models.py
CHANGED
|
@@ -56,8 +56,12 @@ class CyberSecurityOWASPState(State):
|
|
| 56 |
seed: int = 0
|
| 57 |
split: CyberSecurityOWASPSplit = "train"
|
| 58 |
difficulty: int = 0
|
|
|
|
| 59 |
domain: str = ""
|
| 60 |
bug_family: str = ""
|
|
|
|
|
|
|
|
|
|
| 61 |
phase: CyberSecurityOWASPPhase = "discover"
|
| 62 |
max_steps: int = 40
|
| 63 |
done: bool = False
|
|
@@ -71,6 +75,11 @@ class CyberSecurityOWASPState(State):
|
|
| 71 |
reward_history: list[dict[str, float]] = Field(default_factory=list)
|
| 72 |
visible_facts: dict[str, Any] = Field(default_factory=dict)
|
| 73 |
hidden_facts: dict[str, Any] = Field(default_factory=dict)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
metrics: dict[str, Any] = Field(default_factory=dict)
|
| 75 |
anti_cheat_flags: list[str] = Field(default_factory=list)
|
| 76 |
|
|
|
|
| 56 |
seed: int = 0
|
| 57 |
split: CyberSecurityOWASPSplit = "train"
|
| 58 |
difficulty: int = 0
|
| 59 |
+
difficulty_tier: str = "warmup"
|
| 60 |
domain: str = ""
|
| 61 |
bug_family: str = ""
|
| 62 |
+
scenario_family: str = ""
|
| 63 |
+
template_id: str = "fastapi_basic"
|
| 64 |
+
target_weakness: str = "same_role_cross_object"
|
| 65 |
phase: CyberSecurityOWASPPhase = "discover"
|
| 66 |
max_steps: int = 40
|
| 67 |
done: bool = False
|
|
|
|
| 75 |
reward_history: list[dict[str, float]] = Field(default_factory=list)
|
| 76 |
visible_facts: dict[str, Any] = Field(default_factory=dict)
|
| 77 |
hidden_facts: dict[str, Any] = Field(default_factory=dict)
|
| 78 |
+
curriculum_snapshot: dict[str, Any] = Field(default_factory=dict)
|
| 79 |
+
verification_summary: dict[str, Any] = Field(default_factory=dict)
|
| 80 |
+
patch_diff: str = ""
|
| 81 |
+
episode_artifact_path: str | None = None
|
| 82 |
+
observation_history: list[dict[str, Any]] = Field(default_factory=list)
|
| 83 |
metrics: dict[str, Any] = Field(default_factory=dict)
|
| 84 |
anti_cheat_flags: list[str] = Field(default_factory=list)
|
| 85 |
|
pyproject.toml
CHANGED
|
@@ -47,3 +47,14 @@ server = "CyberSecurity_OWASP.server.app:main"
|
|
| 47 |
include-package-data = true
|
| 48 |
packages = ["CyberSecurity_OWASP", "CyberSecurity_OWASP.server"]
|
| 49 |
package-dir = { "CyberSecurity_OWASP" = ".", "CyberSecurity_OWASP.server" = "server" }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
include-package-data = true
|
| 48 |
packages = ["CyberSecurity_OWASP", "CyberSecurity_OWASP.server"]
|
| 49 |
package-dir = { "CyberSecurity_OWASP" = ".", "CyberSecurity_OWASP.server" = "server" }
|
| 50 |
+
|
| 51 |
+
[tool.pytest.ini_options]
|
| 52 |
+
testpaths = ["tests"]
|
| 53 |
+
norecursedirs = [
|
| 54 |
+
".git",
|
| 55 |
+
".venv",
|
| 56 |
+
"__pycache__",
|
| 57 |
+
"codex_tmp_tf",
|
| 58 |
+
"outputs",
|
| 59 |
+
"openenv_CyberSecurity_OWASP.egg-info",
|
| 60 |
+
]
|
rewards.py
CHANGED
|
@@ -41,21 +41,28 @@ def compute_reward(
|
|
| 41 |
reward["visible_tests"] = 1.0 if visible.get("passed") else 0.0
|
| 42 |
elif action.tool_name == "submit_fix":
|
| 43 |
security = verifier_result.get("security", {})
|
|
|
|
| 44 |
regression = verifier_result.get("regression", {})
|
| 45 |
public_routes = verifier_result.get("public_routes", {})
|
| 46 |
quality = verifier_result.get("patch_quality", {})
|
| 47 |
visible = verifier_result.get("visible", {})
|
| 48 |
-
|
|
|
|
| 49 |
reward["regression"] = 3.0 if regression.get("passed") else -5.0
|
| 50 |
reward["public_routes"] = 1.0 if public_routes.get("passed") else -3.0
|
| 51 |
reward["patch_quality"] = 2.0 if quality.get("passed") else 0.0
|
|
|
|
|
|
|
| 52 |
reward["visible_tests"] = 1.0 if visible.get("passed") else -6.0
|
| 53 |
if quality.get("hardcoded"):
|
| 54 |
reward["anti_cheat"] -= 4.0
|
| 55 |
if quality.get("deny_all"):
|
| 56 |
reward["safety"] -= 5.0
|
| 57 |
|
| 58 |
-
flags =
|
|
|
|
|
|
|
|
|
|
| 59 |
if flags:
|
| 60 |
if "external_network_attempt" in flags:
|
| 61 |
reward["safety"] -= 8.0
|
|
@@ -64,6 +71,21 @@ def compute_reward(
|
|
| 64 |
if "hardcoded_hidden_identifier" in flags:
|
| 65 |
reward["anti_cheat"] -= 4.0
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
total = sum(value for key, value in reward.items() if key != "total")
|
| 68 |
reward["total"] = min(15.0, total) if total > 0 else total
|
| 69 |
return reward
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
reward["visible_tests"] = 1.0 if visible.get("passed") else 0.0
|
| 42 |
elif action.tool_name == "submit_fix":
|
| 43 |
security = verifier_result.get("security", {})
|
| 44 |
+
oracle = verifier_result.get("oracle_matrix", {})
|
| 45 |
regression = verifier_result.get("regression", {})
|
| 46 |
public_routes = verifier_result.get("public_routes", {})
|
| 47 |
quality = verifier_result.get("patch_quality", {})
|
| 48 |
visible = verifier_result.get("visible", {})
|
| 49 |
+
security_passed = bool(security.get("passed") and oracle.get("passed", True))
|
| 50 |
+
reward["security"] = 5.0 if security_passed else -6.0
|
| 51 |
reward["regression"] = 3.0 if regression.get("passed") else -5.0
|
| 52 |
reward["public_routes"] = 1.0 if public_routes.get("passed") else -3.0
|
| 53 |
reward["patch_quality"] = 2.0 if quality.get("passed") else 0.0
|
| 54 |
+
if quality.get("passed") and _followed_repair_phase_order(state):
|
| 55 |
+
reward["patch_quality"] += 0.5
|
| 56 |
reward["visible_tests"] = 1.0 if visible.get("passed") else -6.0
|
| 57 |
if quality.get("hardcoded"):
|
| 58 |
reward["anti_cheat"] -= 4.0
|
| 59 |
if quality.get("deny_all"):
|
| 60 |
reward["safety"] -= 5.0
|
| 61 |
|
| 62 |
+
flags = sorted(
|
| 63 |
+
set(verifier_result.get("anti_cheat_flags", []) or [])
|
| 64 |
+
| set(state.anti_cheat_flags or [])
|
| 65 |
+
)
|
| 66 |
if flags:
|
| 67 |
if "external_network_attempt" in flags:
|
| 68 |
reward["safety"] -= 8.0
|
|
|
|
| 71 |
if "hardcoded_hidden_identifier" in flags:
|
| 72 |
reward["anti_cheat"] -= 4.0
|
| 73 |
|
| 74 |
+
if verifier_result.get("invalid_action"):
|
| 75 |
+
reward["safety"] -= 0.5
|
| 76 |
+
if verifier_result.get("repeated_action"):
|
| 77 |
+
reward["safety"] -= 0.2
|
| 78 |
+
|
| 79 |
total = sum(value for key, value in reward.items() if key != "total")
|
| 80 |
reward["total"] = min(15.0, total) if total > 0 else total
|
| 81 |
return reward
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
def _followed_repair_phase_order(state: CyberSecurityOWASPState) -> bool:
|
| 85 |
+
tools = [item.get("tool_name") for item in state.action_history]
|
| 86 |
+
required = ["submit_finding", "patch_file", "run_visible_tests", "submit_fix"]
|
| 87 |
+
cursor = 0
|
| 88 |
+
for tool in tools:
|
| 89 |
+
if cursor < len(required) and tool == required[cursor]:
|
| 90 |
+
cursor += 1
|
| 91 |
+
return cursor == len(required)
|
scenario_compiler.py
CHANGED
|
@@ -1,66 +1,26 @@
|
|
| 1 |
-
"""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
| 5 |
-
import os
|
| 6 |
-
import tempfile
|
| 7 |
-
from pathlib import Path
|
| 8 |
from typing import Any
|
| 9 |
-
from uuid import uuid4
|
| 10 |
|
| 11 |
try:
|
| 12 |
-
from .
|
| 13 |
-
from .policy_graph import build_invoice_policy
|
| 14 |
-
from .template_renderer import render_fastapi_basic
|
| 15 |
except ImportError: # pragma: no cover
|
| 16 |
-
from
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
def compile_scenario(seed: int, split: str = "train", difficulty: int = 0) -> dict[str, Any]:
|
| 35 |
-
"""Compile one isolated MVP authorization-repair scenario."""
|
| 36 |
-
|
| 37 |
-
compiled = build_invoice_policy(seed)
|
| 38 |
-
workspace = _make_workspace(prefix=f"cybersecurity_owasp_{split}_{seed}_")
|
| 39 |
-
editable_files = render_fastapi_basic(workspace, compiled.public_hint, compiled.hidden_facts)
|
| 40 |
-
task_id = f"{split}-invoices-bola-{seed}"
|
| 41 |
-
hidden = dict(compiled.hidden_facts)
|
| 42 |
-
hidden.update(
|
| 43 |
-
{
|
| 44 |
-
"workspace": str(workspace),
|
| 45 |
-
"editable_files": editable_files,
|
| 46 |
-
"initial_file_hashes": {
|
| 47 |
-
path: (workspace / path).read_text(encoding="utf-8")
|
| 48 |
-
for path in editable_files
|
| 49 |
-
},
|
| 50 |
-
}
|
| 51 |
)
|
| 52 |
-
return {
|
| 53 |
-
"task_id": task_id,
|
| 54 |
-
"workspace": workspace,
|
| 55 |
-
"domain": "invoices",
|
| 56 |
-
"bug_family": "bola_idor",
|
| 57 |
-
"difficulty": difficulty,
|
| 58 |
-
"task_brief": (
|
| 59 |
-
"Inspect the generated invoices app and policy. Find the broken "
|
| 60 |
-
"authorization behavior, submit a finding with local evidence, patch "
|
| 61 |
-
"the app, preserve intended owner/admin/public behavior, then submit."
|
| 62 |
-
),
|
| 63 |
-
"public_hint": compiled.public_hint,
|
| 64 |
-
"workspace_summary": visible_workspace_summary(editable_files, compiled.public_hint),
|
| 65 |
-
"hidden_facts": hidden,
|
| 66 |
-
}
|
|
|
|
| 1 |
+
"""Compatibility facade for deterministic scenario compilation."""
|
| 2 |
|
| 3 |
from __future__ import annotations
|
| 4 |
|
|
|
|
|
|
|
|
|
|
| 5 |
from typing import Any
|
|
|
|
| 6 |
|
| 7 |
try:
|
| 8 |
+
from .server.scenario_factory import ScenarioFactory
|
|
|
|
|
|
|
| 9 |
except ImportError: # pragma: no cover
|
| 10 |
+
from server.scenario_factory import ScenarioFactory
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def compile_scenario(
|
| 14 |
+
seed: int,
|
| 15 |
+
split: str = "train",
|
| 16 |
+
difficulty: int = 0,
|
| 17 |
+
curriculum_profile: dict[str, Any] | None = None,
|
| 18 |
+
) -> dict[str, Any]:
|
| 19 |
+
"""Compile one isolated authorization-repair scenario."""
|
| 20 |
+
|
| 21 |
+
return ScenarioFactory().compile_scenario(
|
| 22 |
+
seed,
|
| 23 |
+
split=split,
|
| 24 |
+
difficulty=difficulty,
|
| 25 |
+
curriculum_profile=curriculum_profile,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
scripts/modal_ephemeral_train.py
CHANGED
|
@@ -35,6 +35,8 @@ image = (
|
|
| 35 |
ignore=[
|
| 36 |
".git",
|
| 37 |
".venv",
|
|
|
|
|
|
|
| 38 |
"__pycache__",
|
| 39 |
".pytest_cache",
|
| 40 |
"outputs",
|
|
|
|
| 35 |
ignore=[
|
| 36 |
".git",
|
| 37 |
".venv",
|
| 38 |
+
".env",
|
| 39 |
+
".env.*",
|
| 40 |
"__pycache__",
|
| 41 |
".pytest_cache",
|
| 42 |
"outputs",
|
scripts/modal_train_grpo.py
CHANGED
|
@@ -135,6 +135,8 @@ def _training_image() -> modal.Image:
|
|
| 135 |
ignore=[
|
| 136 |
".git",
|
| 137 |
".venv",
|
|
|
|
|
|
|
| 138 |
"__pycache__",
|
| 139 |
".pytest_cache",
|
| 140 |
"outputs",
|
|
|
|
| 135 |
ignore=[
|
| 136 |
".git",
|
| 137 |
".venv",
|
| 138 |
+
".env",
|
| 139 |
+
".env.*",
|
| 140 |
"__pycache__",
|
| 141 |
".pytest_cache",
|
| 142 |
"outputs",
|
scripts/track_pytest.py
CHANGED
|
@@ -10,6 +10,7 @@ from pathlib import Path
|
|
| 10 |
|
| 11 |
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
| 12 |
sys.path.insert(0, str(PROJECT_ROOT))
|
|
|
|
| 13 |
|
| 14 |
from training.trackio_utils import build_run_name, get_git_sha, log_trackio_metrics, trackio_run
|
| 15 |
|
|
|
|
| 10 |
|
| 11 |
PROJECT_ROOT = Path(__file__).resolve().parents[1]
|
| 12 |
sys.path.insert(0, str(PROJECT_ROOT))
|
| 13 |
+
sys.path.insert(0, str(PROJECT_ROOT.parent))
|
| 14 |
|
| 15 |
from training.trackio_utils import build_run_name, get_git_sha, log_trackio_metrics, trackio_run
|
| 16 |
|
server/CyberSecurity_OWASP_environment.py
CHANGED
|
@@ -4,7 +4,6 @@ from __future__ import annotations
|
|
| 4 |
|
| 5 |
import json
|
| 6 |
import shutil
|
| 7 |
-
from pathlib import Path
|
| 8 |
from typing import Any
|
| 9 |
from uuid import uuid4
|
| 10 |
|
|
@@ -16,16 +15,20 @@ try:
|
|
| 16 |
CyberSecurityOWASPObservation,
|
| 17 |
CyberSecurityOWASPState,
|
| 18 |
)
|
| 19 |
-
from ..
|
| 20 |
-
from .
|
| 21 |
-
from .
|
|
|
|
| 22 |
from .reward_engine import evaluate_action
|
|
|
|
| 23 |
except ImportError: # pragma: no cover
|
| 24 |
from models import CyberSecurityOWASPAction, CyberSecurityOWASPObservation, CyberSecurityOWASPState
|
| 25 |
-
from
|
| 26 |
-
from
|
| 27 |
-
from
|
|
|
|
| 28 |
from server.reward_engine import evaluate_action
|
|
|
|
| 29 |
|
| 30 |
|
| 31 |
ALLOWED_TOOLS = {
|
|
@@ -67,6 +70,9 @@ class CybersecurityOwaspEnvironment(
|
|
| 67 |
self._visible_policy_hint: dict[str, Any] = {}
|
| 68 |
self._workspace_summary: dict[str, Any] = {}
|
| 69 |
self._last_done_observation: CyberSecurityOWASPObservation | None = None
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
def reset(
|
| 72 |
self,
|
|
@@ -78,15 +84,29 @@ class CybersecurityOwaspEnvironment(
|
|
| 78 |
) -> CyberSecurityOWASPObservation:
|
| 79 |
self.close()
|
| 80 |
actual_seed = int(seed if seed is not None else 0)
|
| 81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
self._state = CyberSecurityOWASPState(
|
| 83 |
episode_id=episode_id or str(uuid4()),
|
| 84 |
task_id=scenario["task_id"],
|
| 85 |
seed=actual_seed,
|
| 86 |
split=split,
|
| 87 |
-
difficulty=difficulty,
|
|
|
|
| 88 |
domain=scenario["domain"],
|
| 89 |
bug_family=scenario["bug_family"],
|
|
|
|
|
|
|
|
|
|
| 90 |
phase="discover",
|
| 91 |
step_count=0,
|
| 92 |
max_steps=40,
|
|
@@ -94,6 +114,7 @@ class CybersecurityOwaspEnvironment(
|
|
| 94 |
success=False,
|
| 95 |
visible_facts={"workspace_summary": scenario["workspace_summary"]},
|
| 96 |
hidden_facts=scenario["hidden_facts"],
|
|
|
|
| 97 |
metrics={"reset_count": 1},
|
| 98 |
)
|
| 99 |
self._task_brief = scenario["task_brief"]
|
|
@@ -124,7 +145,12 @@ class CybersecurityOwaspEnvironment(
|
|
| 124 |
)
|
| 125 |
|
| 126 |
if action.tool_name not in ALLOWED_TOOLS[self._state.phase]:
|
| 127 |
-
verifier, reward = evaluate_action(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
return self._finish_step(
|
| 129 |
"Action is not allowed in the current phase.",
|
| 130 |
reward,
|
|
@@ -143,7 +169,12 @@ class CybersecurityOwaspEnvironment(
|
|
| 143 |
visible_test_result=visible_tests,
|
| 144 |
)
|
| 145 |
except Exception as exc: # keep malformed agent actions from crashing the server
|
| 146 |
-
verifier, reward = evaluate_action(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 147 |
return self._finish_step(
|
| 148 |
"Tool execution failed.",
|
| 149 |
reward,
|
|
@@ -164,91 +195,48 @@ class CybersecurityOwaspEnvironment(
|
|
| 164 |
def _execute(
|
| 165 |
self, action: CyberSecurityOWASPAction, anti_cheat_flags: list[str]
|
| 166 |
) -> tuple[str, dict, dict[str, float], str | None]:
|
| 167 |
-
verifier
|
| 168 |
-
reward = {key: 0.0 for key in (
|
| 169 |
-
"discovery",
|
| 170 |
-
"security",
|
| 171 |
-
"regression",
|
| 172 |
-
"public_routes",
|
| 173 |
-
"patch_quality",
|
| 174 |
-
"visible_tests",
|
| 175 |
-
"safety",
|
| 176 |
-
"anti_cheat",
|
| 177 |
-
"total",
|
| 178 |
-
)}
|
| 179 |
-
visible_tests = None
|
| 180 |
-
args = action.arguments or {}
|
| 181 |
|
| 182 |
-
if action.tool_name
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
"/health": {"get": {"x-public": True}},
|
| 195 |
-
"/invoices/{invoice_id}": {"get": {"x-public": False}},
|
| 196 |
-
},
|
| 197 |
-
},
|
| 198 |
-
indent=2,
|
| 199 |
-
), verifier, reward, None
|
| 200 |
-
if action.tool_name == "read_file":
|
| 201 |
-
path = self._resolve_path(str(args.get("path", "")))
|
| 202 |
-
return path.read_text(encoding="utf-8"), verifier, reward, None
|
| 203 |
-
if action.tool_name == "search_code":
|
| 204 |
-
return self._search_code(str(args.get("query", ""))), verifier, reward, None
|
| 205 |
-
if action.tool_name == "send_local_request":
|
| 206 |
-
if not is_local_route(str(args.get("path", ""))):
|
| 207 |
-
raise ValueError("send_local_request only accepts local route paths")
|
| 208 |
-
response = simulate_request(
|
| 209 |
self._state,
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
return json.dumps(response, indent=2, sort_keys=True), verifier, reward, None
|
| 215 |
-
if action.tool_name == "compare_identities":
|
| 216 |
-
path = str(args.get("path", ""))
|
| 217 |
-
first = str(args.get("first_user_id", ""))
|
| 218 |
-
second = str(args.get("second_user_id", ""))
|
| 219 |
-
if not is_local_route(path):
|
| 220 |
-
raise ValueError("compare_identities only accepts local route paths")
|
| 221 |
-
response = {
|
| 222 |
-
"first": simulate_request(self._state, str(args.get("method", "GET")), path, first),
|
| 223 |
-
"second": simulate_request(self._state, str(args.get("method", "GET")), path, second),
|
| 224 |
-
}
|
| 225 |
-
return json.dumps(response, indent=2, sort_keys=True), verifier, reward, None
|
| 226 |
if action.tool_name == "submit_finding":
|
| 227 |
verifier, reward = evaluate_action(self._state, action, anti_cheat_flags)
|
|
|
|
| 228 |
if verifier.get("finding", {}).get("valid"):
|
| 229 |
self._state.finding_submitted = True
|
| 230 |
self._state.phase = "patch"
|
| 231 |
return "Finding accepted. Patch phase unlocked.", verifier, reward, None
|
| 232 |
return "Finding was not specific enough to unlock patching.", verifier, reward, None
|
| 233 |
-
if action.tool_name == "patch_file":
|
| 234 |
-
path = self._resolve_path(str(args.get("path", "")), write=True)
|
| 235 |
-
if "content" in args:
|
| 236 |
-
path.write_text(str(args["content"]), encoding="utf-8")
|
| 237 |
-
else:
|
| 238 |
-
self._apply_unified_diff(path, str(args.get("diff", "")))
|
| 239 |
-
return f"Patched {args.get('path')}.", verifier, reward, None
|
| 240 |
if action.tool_name == "run_visible_tests":
|
| 241 |
verifier, reward = evaluate_action(self._state, action, anti_cheat_flags)
|
|
|
|
| 242 |
visible_tests = json.dumps(verifier.get("visible", {}), indent=2, sort_keys=True)
|
| 243 |
return visible_tests, verifier, reward, visible_tests
|
| 244 |
if action.tool_name == "submit_fix":
|
| 245 |
verifier, reward = evaluate_action(self._state, action, anti_cheat_flags)
|
|
|
|
| 246 |
self._state.patch_submitted = True
|
| 247 |
security = verifier.get("security", {}).get("passed", False)
|
|
|
|
| 248 |
regression = verifier.get("regression", {}).get("passed", False)
|
| 249 |
public = verifier.get("public_routes", {}).get("passed", False)
|
| 250 |
quality = verifier.get("patch_quality", {}).get("passed", False)
|
| 251 |
-
self._state.success = bool(security and regression and public and quality)
|
| 252 |
self._state.done = True
|
| 253 |
self._state.phase = "done"
|
| 254 |
self._state.failure_reason = None if self._state.success else "hidden_verifier_failed"
|
|
@@ -281,7 +269,10 @@ class CybersecurityOwaspEnvironment(
|
|
| 281 |
visible_test_result=visible_test_result,
|
| 282 |
done_reason=self._state.failure_reason,
|
| 283 |
)
|
|
|
|
|
|
|
| 284 |
if self._state.done:
|
|
|
|
| 285 |
self._last_done_observation = obs
|
| 286 |
return obs
|
| 287 |
|
|
@@ -314,53 +305,15 @@ class CybersecurityOwaspEnvironment(
|
|
| 314 |
metadata={"episode_id": self._state.episode_id, "step_count": self._state.step_count},
|
| 315 |
)
|
| 316 |
|
| 317 |
-
def
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
-
path = workspace / rel
|
| 330 |
-
text = path.read_text(encoding="utf-8")
|
| 331 |
-
for idx, line in enumerate(text.splitlines(), start=1):
|
| 332 |
-
if query.lower() in line.lower():
|
| 333 |
-
results.append(f"{rel}:{idx}: {line}")
|
| 334 |
-
return "\n".join(results) or "No matches."
|
| 335 |
-
|
| 336 |
-
def _apply_unified_diff(self, path: Path, diff: str) -> None:
|
| 337 |
-
if not diff.strip():
|
| 338 |
-
raise ValueError("diff or content is required")
|
| 339 |
-
original = path.read_text(encoding="utf-8").splitlines(True)
|
| 340 |
-
output: list[str] = []
|
| 341 |
-
old_index = 0
|
| 342 |
-
lines = diff.splitlines(True)
|
| 343 |
-
i = 0
|
| 344 |
-
while i < len(lines):
|
| 345 |
-
line = lines[i]
|
| 346 |
-
if not line.startswith("@@"):
|
| 347 |
-
i += 1
|
| 348 |
-
continue
|
| 349 |
-
old_start = int(line.split()[1].split(",")[0][1:])
|
| 350 |
-
output.extend(original[old_index : old_start - 1])
|
| 351 |
-
old_index = old_start - 1
|
| 352 |
-
i += 1
|
| 353 |
-
while i < len(lines) and not lines[i].startswith("@@"):
|
| 354 |
-
hunk_line = lines[i]
|
| 355 |
-
if hunk_line.startswith(" "):
|
| 356 |
-
output.append(original[old_index])
|
| 357 |
-
old_index += 1
|
| 358 |
-
elif hunk_line.startswith("-"):
|
| 359 |
-
old_index += 1
|
| 360 |
-
elif hunk_line.startswith("+"):
|
| 361 |
-
output.append(hunk_line[1:])
|
| 362 |
-
elif hunk_line.startswith("\\"):
|
| 363 |
-
pass
|
| 364 |
-
i += 1
|
| 365 |
-
output.extend(original[old_index:])
|
| 366 |
-
path.write_text("".join(output), encoding="utf-8")
|
|
|
|
| 4 |
|
| 5 |
import json
|
| 6 |
import shutil
|
|
|
|
| 7 |
from typing import Any
|
| 8 |
from uuid import uuid4
|
| 9 |
|
|
|
|
| 15 |
CyberSecurityOWASPObservation,
|
| 16 |
CyberSecurityOWASPState,
|
| 17 |
)
|
| 18 |
+
from ..validators import detect_cheating
|
| 19 |
+
from .action_tools import ActionTools
|
| 20 |
+
from .curriculum import CurriculumController
|
| 21 |
+
from .episode_logger import EpisodeArtifactLogger
|
| 22 |
from .reward_engine import evaluate_action
|
| 23 |
+
from .scenario_factory import ScenarioFactory
|
| 24 |
except ImportError: # pragma: no cover
|
| 25 |
from models import CyberSecurityOWASPAction, CyberSecurityOWASPObservation, CyberSecurityOWASPState
|
| 26 |
+
from validators import detect_cheating
|
| 27 |
+
from server.action_tools import ActionTools
|
| 28 |
+
from server.curriculum import CurriculumController
|
| 29 |
+
from server.episode_logger import EpisodeArtifactLogger
|
| 30 |
from server.reward_engine import evaluate_action
|
| 31 |
+
from server.scenario_factory import ScenarioFactory
|
| 32 |
|
| 33 |
|
| 34 |
ALLOWED_TOOLS = {
|
|
|
|
| 70 |
self._visible_policy_hint: dict[str, Any] = {}
|
| 71 |
self._workspace_summary: dict[str, Any] = {}
|
| 72 |
self._last_done_observation: CyberSecurityOWASPObservation | None = None
|
| 73 |
+
self._curriculum = CurriculumController()
|
| 74 |
+
self._scenario_factory = ScenarioFactory()
|
| 75 |
+
self._episode_logger = EpisodeArtifactLogger()
|
| 76 |
|
| 77 |
def reset(
|
| 78 |
self,
|
|
|
|
| 84 |
) -> CyberSecurityOWASPObservation:
|
| 85 |
self.close()
|
| 86 |
actual_seed = int(seed if seed is not None else 0)
|
| 87 |
+
curriculum_profile = self._curriculum.select_profile(
|
| 88 |
+
seed=actual_seed,
|
| 89 |
+
split=split,
|
| 90 |
+
requested_difficulty=difficulty,
|
| 91 |
+
)
|
| 92 |
+
scenario = self._scenario_factory.compile_scenario(
|
| 93 |
+
actual_seed,
|
| 94 |
+
split=split,
|
| 95 |
+
difficulty=difficulty,
|
| 96 |
+
curriculum_profile=curriculum_profile,
|
| 97 |
+
)
|
| 98 |
self._state = CyberSecurityOWASPState(
|
| 99 |
episode_id=episode_id or str(uuid4()),
|
| 100 |
task_id=scenario["task_id"],
|
| 101 |
seed=actual_seed,
|
| 102 |
split=split,
|
| 103 |
+
difficulty=scenario["difficulty"],
|
| 104 |
+
difficulty_tier=scenario["difficulty_tier"],
|
| 105 |
domain=scenario["domain"],
|
| 106 |
bug_family=scenario["bug_family"],
|
| 107 |
+
scenario_family=scenario["scenario_family"],
|
| 108 |
+
template_id=scenario["template_id"],
|
| 109 |
+
target_weakness=scenario["target_weakness"],
|
| 110 |
phase="discover",
|
| 111 |
step_count=0,
|
| 112 |
max_steps=40,
|
|
|
|
| 114 |
success=False,
|
| 115 |
visible_facts={"workspace_summary": scenario["workspace_summary"]},
|
| 116 |
hidden_facts=scenario["hidden_facts"],
|
| 117 |
+
curriculum_snapshot=scenario["curriculum_snapshot"],
|
| 118 |
metrics={"reset_count": 1},
|
| 119 |
)
|
| 120 |
self._task_brief = scenario["task_brief"]
|
|
|
|
| 145 |
)
|
| 146 |
|
| 147 |
if action.tool_name not in ALLOWED_TOOLS[self._state.phase]:
|
| 148 |
+
verifier, reward = evaluate_action(
|
| 149 |
+
self._state,
|
| 150 |
+
action,
|
| 151 |
+
anti_cheat_flags,
|
| 152 |
+
invalid_action=True,
|
| 153 |
+
)
|
| 154 |
return self._finish_step(
|
| 155 |
"Action is not allowed in the current phase.",
|
| 156 |
reward,
|
|
|
|
| 169 |
visible_test_result=visible_tests,
|
| 170 |
)
|
| 171 |
except Exception as exc: # keep malformed agent actions from crashing the server
|
| 172 |
+
verifier, reward = evaluate_action(
|
| 173 |
+
self._state,
|
| 174 |
+
action,
|
| 175 |
+
anti_cheat_flags,
|
| 176 |
+
invalid_action=True,
|
| 177 |
+
)
|
| 178 |
return self._finish_step(
|
| 179 |
"Tool execution failed.",
|
| 180 |
reward,
|
|
|
|
| 195 |
def _execute(
|
| 196 |
self, action: CyberSecurityOWASPAction, anti_cheat_flags: list[str]
|
| 197 |
) -> tuple[str, dict, dict[str, float], str | None]:
|
| 198 |
+
verifier, reward = evaluate_action(self._state, action, anti_cheat_flags)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 199 |
|
| 200 |
+
if action.tool_name in {
|
| 201 |
+
"noop",
|
| 202 |
+
"inspect_policy_graph",
|
| 203 |
+
"list_routes",
|
| 204 |
+
"read_openapi",
|
| 205 |
+
"read_file",
|
| 206 |
+
"search_code",
|
| 207 |
+
"send_local_request",
|
| 208 |
+
"compare_identities",
|
| 209 |
+
"patch_file",
|
| 210 |
+
}:
|
| 211 |
+
result = ActionTools(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
self._state,
|
| 213 |
+
self._visible_policy_hint,
|
| 214 |
+
self._workspace_summary,
|
| 215 |
+
).execute(action)
|
| 216 |
+
return result.message, verifier, reward, result.visible_test_result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 217 |
if action.tool_name == "submit_finding":
|
| 218 |
verifier, reward = evaluate_action(self._state, action, anti_cheat_flags)
|
| 219 |
+
self._state.verification_summary = verifier
|
| 220 |
if verifier.get("finding", {}).get("valid"):
|
| 221 |
self._state.finding_submitted = True
|
| 222 |
self._state.phase = "patch"
|
| 223 |
return "Finding accepted. Patch phase unlocked.", verifier, reward, None
|
| 224 |
return "Finding was not specific enough to unlock patching.", verifier, reward, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
if action.tool_name == "run_visible_tests":
|
| 226 |
verifier, reward = evaluate_action(self._state, action, anti_cheat_flags)
|
| 227 |
+
self._state.verification_summary = verifier
|
| 228 |
visible_tests = json.dumps(verifier.get("visible", {}), indent=2, sort_keys=True)
|
| 229 |
return visible_tests, verifier, reward, visible_tests
|
| 230 |
if action.tool_name == "submit_fix":
|
| 231 |
verifier, reward = evaluate_action(self._state, action, anti_cheat_flags)
|
| 232 |
+
self._state.verification_summary = verifier
|
| 233 |
self._state.patch_submitted = True
|
| 234 |
security = verifier.get("security", {}).get("passed", False)
|
| 235 |
+
oracle = verifier.get("oracle_matrix", {}).get("passed", False)
|
| 236 |
regression = verifier.get("regression", {}).get("passed", False)
|
| 237 |
public = verifier.get("public_routes", {}).get("passed", False)
|
| 238 |
quality = verifier.get("patch_quality", {}).get("passed", False)
|
| 239 |
+
self._state.success = bool(security and oracle and regression and public and quality)
|
| 240 |
self._state.done = True
|
| 241 |
self._state.phase = "done"
|
| 242 |
self._state.failure_reason = None if self._state.success else "hidden_verifier_failed"
|
|
|
|
| 269 |
visible_test_result=visible_test_result,
|
| 270 |
done_reason=self._state.failure_reason,
|
| 271 |
)
|
| 272 |
+
observation_record = obs.model_dump()
|
| 273 |
+
self._state.observation_history.append(observation_record)
|
| 274 |
if self._state.done:
|
| 275 |
+
self._finalize_terminal_episode(observation_record)
|
| 276 |
self._last_done_observation = obs
|
| 277 |
return obs
|
| 278 |
|
|
|
|
| 305 |
metadata={"episode_id": self._state.episode_id, "step_count": self._state.step_count},
|
| 306 |
)
|
| 307 |
|
| 308 |
+
def _finalize_terminal_episode(self, observation_record: dict[str, Any]) -> None:
|
| 309 |
+
if self._state.episode_artifact_path:
|
| 310 |
+
return
|
| 311 |
+
mastery = self._curriculum.record_episode(self._state)
|
| 312 |
+
self._state.curriculum_snapshot = {
|
| 313 |
+
**self._state.curriculum_snapshot,
|
| 314 |
+
"post_episode_mastery": mastery,
|
| 315 |
+
}
|
| 316 |
+
self._episode_logger.log_episode(
|
| 317 |
+
self._state,
|
| 318 |
+
final_observation=observation_record,
|
| 319 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
server/__init__.py
CHANGED
|
@@ -6,6 +6,16 @@
|
|
| 6 |
|
| 7 |
"""Cybersecurity Owasp environment server components."""
|
| 8 |
|
|
|
|
| 9 |
from .CyberSecurity_OWASP_environment import CybersecurityOwaspEnvironment
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
__all__ = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
"""Cybersecurity Owasp environment server components."""
|
| 8 |
|
| 9 |
+
from .adversarial_designer import BoundedAdversarialDesigner
|
| 10 |
from .CyberSecurity_OWASP_environment import CybersecurityOwaspEnvironment
|
| 11 |
+
from .curriculum import CurriculumController
|
| 12 |
+
from .scenario_factory import ScenarioFactory
|
| 13 |
+
from .verifier import MultiLayerVerifier
|
| 14 |
|
| 15 |
+
__all__ = [
|
| 16 |
+
"BoundedAdversarialDesigner",
|
| 17 |
+
"CurriculumController",
|
| 18 |
+
"CybersecurityOwaspEnvironment",
|
| 19 |
+
"MultiLayerVerifier",
|
| 20 |
+
"ScenarioFactory",
|
| 21 |
+
]
|
server/action_tools.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Typed action tool dispatcher for the generated app sandbox."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
from dataclasses import dataclass
|
| 7 |
+
|
| 8 |
+
try:
|
| 9 |
+
from ..models import CyberSecurityOWASPAction, CyberSecurityOWASPState
|
| 10 |
+
from .app_sandbox import AppSandbox
|
| 11 |
+
except ImportError: # pragma: no cover
|
| 12 |
+
from models import CyberSecurityOWASPAction, CyberSecurityOWASPState
|
| 13 |
+
from server.app_sandbox import AppSandbox
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@dataclass(frozen=True)
|
| 17 |
+
class ToolResult:
|
| 18 |
+
message: str
|
| 19 |
+
visible_test_result: str | None = None
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class ActionTools:
|
| 23 |
+
"""Executes phase-gated, safe tools against one episode state."""
|
| 24 |
+
|
| 25 |
+
def __init__(
|
| 26 |
+
self,
|
| 27 |
+
state: CyberSecurityOWASPState,
|
| 28 |
+
visible_policy_hint: dict,
|
| 29 |
+
workspace_summary: dict,
|
| 30 |
+
):
|
| 31 |
+
self.state = state
|
| 32 |
+
self.visible_policy_hint = visible_policy_hint
|
| 33 |
+
self.workspace_summary = workspace_summary
|
| 34 |
+
self.sandbox = AppSandbox(state)
|
| 35 |
+
|
| 36 |
+
def execute(self, action: CyberSecurityOWASPAction) -> ToolResult:
|
| 37 |
+
args = action.arguments or {}
|
| 38 |
+
if action.tool_name == "noop":
|
| 39 |
+
return ToolResult("No operation.")
|
| 40 |
+
if action.tool_name == "inspect_policy_graph":
|
| 41 |
+
return ToolResult(json.dumps(self.visible_policy_hint, indent=2, sort_keys=True))
|
| 42 |
+
if action.tool_name == "list_routes":
|
| 43 |
+
return ToolResult(json.dumps(self.workspace_summary["routes"], indent=2))
|
| 44 |
+
if action.tool_name == "read_openapi":
|
| 45 |
+
return ToolResult(self.sandbox.read_openapi())
|
| 46 |
+
if action.tool_name == "read_file":
|
| 47 |
+
return ToolResult(self.sandbox.read_file(str(args.get("path", ""))))
|
| 48 |
+
if action.tool_name == "search_code":
|
| 49 |
+
return ToolResult(self.sandbox.search_code(str(args.get("query", ""))))
|
| 50 |
+
if action.tool_name == "send_local_request":
|
| 51 |
+
response = self.sandbox.send_local_request(
|
| 52 |
+
str(args.get("method", "GET")),
|
| 53 |
+
str(args.get("path", "")),
|
| 54 |
+
args.get("user_id"),
|
| 55 |
+
)
|
| 56 |
+
return ToolResult(json.dumps(response, indent=2, sort_keys=True))
|
| 57 |
+
if action.tool_name == "compare_identities":
|
| 58 |
+
response = self.sandbox.compare_identities(
|
| 59 |
+
str(args.get("method", "GET")),
|
| 60 |
+
str(args.get("path", "")),
|
| 61 |
+
str(args.get("first_user_id", "")),
|
| 62 |
+
str(args.get("second_user_id", "")),
|
| 63 |
+
)
|
| 64 |
+
return ToolResult(json.dumps(response, indent=2, sort_keys=True))
|
| 65 |
+
if action.tool_name == "patch_file":
|
| 66 |
+
result = self.sandbox.patch_file(
|
| 67 |
+
str(args.get("path", "")),
|
| 68 |
+
content=str(args["content"]) if "content" in args else None,
|
| 69 |
+
diff=str(args.get("diff", "")) if "content" not in args else None,
|
| 70 |
+
)
|
| 71 |
+
changed = "no diff" if not result["diff"].strip() else "diff recorded"
|
| 72 |
+
return ToolResult(f"Patched {result['path']} ({changed}).")
|
| 73 |
+
raise ValueError(f"Unhandled tool {action.tool_name}")
|
server/adversarial_designer.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Bounded adversarial scenario targeting for synthetic local lab episodes."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
from typing import Any
|
| 7 |
+
|
| 8 |
+
try:
|
| 9 |
+
from .curriculum import WEAKNESS_TARGETS
|
| 10 |
+
except ImportError: # pragma: no cover
|
| 11 |
+
from server.curriculum import WEAKNESS_TARGETS
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
TARGET_SPECS: dict[str, dict[str, Any]] = {
|
| 15 |
+
"same_role_cross_object": {
|
| 16 |
+
"description": "Same-role actor tries to read another user's object.",
|
| 17 |
+
"hidden_focus": ["same_tenant_other_user_blocked"],
|
| 18 |
+
},
|
| 19 |
+
"cross_tenant_boundary": {
|
| 20 |
+
"description": "Tenant-local admin is denied access to another tenant's resource.",
|
| 21 |
+
"hidden_focus": ["cross_tenant_blocked"],
|
| 22 |
+
},
|
| 23 |
+
"public_route_overlock": {
|
| 24 |
+
"description": "Public health route must remain unauthenticated after patching.",
|
| 25 |
+
"hidden_focus": ["health_public"],
|
| 26 |
+
},
|
| 27 |
+
"alternate_route_same_service": {
|
| 28 |
+
"description": "Alternate route/service access should follow the same policy oracle.",
|
| 29 |
+
"hidden_focus": ["oracle_matrix"],
|
| 30 |
+
},
|
| 31 |
+
"visible_test_edge_case": {
|
| 32 |
+
"description": "Visible tests are insufficient; hidden policy matrix decides success.",
|
| 33 |
+
"hidden_focus": ["visible_test_only_guard"],
|
| 34 |
+
},
|
| 35 |
+
}
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
@dataclass(frozen=True)
|
| 39 |
+
class BoundedAdversarialDesigner:
|
| 40 |
+
"""Chooses safe local lab variants that target tracked agent weaknesses."""
|
| 41 |
+
|
| 42 |
+
def design(self, *, seed: int, split: str, curriculum_profile: dict[str, Any]) -> dict[str, Any]:
|
| 43 |
+
target = str(curriculum_profile.get("target_weakness") or "")
|
| 44 |
+
if target not in TARGET_SPECS:
|
| 45 |
+
target = WEAKNESS_TARGETS[int(seed) % len(WEAKNESS_TARGETS)]
|
| 46 |
+
family = f"invoices.bola_idor.{target}"
|
| 47 |
+
if split == "hidden_eval":
|
| 48 |
+
family = f"heldout.{family}"
|
| 49 |
+
spec = TARGET_SPECS[target]
|
| 50 |
+
return {
|
| 51 |
+
"domain": "invoices",
|
| 52 |
+
"bug_family": "bola_idor",
|
| 53 |
+
"template_id": "fastapi_basic",
|
| 54 |
+
"scenario_family": family,
|
| 55 |
+
"target_weakness": target,
|
| 56 |
+
"hidden_focus": list(spec["hidden_focus"]),
|
| 57 |
+
"description": spec["description"],
|
| 58 |
+
"safe_lab_only": True,
|
| 59 |
+
}
|
server/app_sandbox.py
ADDED
|
@@ -0,0 +1,139 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Ephemeral generated app sandbox operations."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import difflib
|
| 6 |
+
import json
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
from ..models import CyberSecurityOWASPState
|
| 12 |
+
from ..safety import is_local_route
|
| 13 |
+
from ..validators import is_path_allowed, simulate_request
|
| 14 |
+
except ImportError: # pragma: no cover
|
| 15 |
+
from models import CyberSecurityOWASPState
|
| 16 |
+
from safety import is_local_route
|
| 17 |
+
from validators import is_path_allowed, simulate_request
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class AppSandbox:
|
| 21 |
+
"""Encapsulates all generated workspace reads, patches, and local requests."""
|
| 22 |
+
|
| 23 |
+
def __init__(self, state: CyberSecurityOWASPState):
|
| 24 |
+
self.state = state
|
| 25 |
+
|
| 26 |
+
@property
|
| 27 |
+
def workspace(self) -> Path:
|
| 28 |
+
return Path(str(self.state.hidden_facts["workspace"]))
|
| 29 |
+
|
| 30 |
+
def read_file(self, path: str) -> str:
|
| 31 |
+
return self._resolve_path(path).read_text(encoding="utf-8")
|
| 32 |
+
|
| 33 |
+
def search_code(self, query: str) -> str:
|
| 34 |
+
if not query:
|
| 35 |
+
raise ValueError("query is required")
|
| 36 |
+
results: list[str] = []
|
| 37 |
+
for rel in self.state.hidden_facts.get("editable_files", []):
|
| 38 |
+
path = self.workspace / rel
|
| 39 |
+
text = path.read_text(encoding="utf-8")
|
| 40 |
+
for idx, line in enumerate(text.splitlines(), start=1):
|
| 41 |
+
if query.lower() in line.lower():
|
| 42 |
+
results.append(f"{rel}:{idx}: {line}")
|
| 43 |
+
return "\n".join(results) or "No matches."
|
| 44 |
+
|
| 45 |
+
def patch_file(self, path: str, *, content: str | None = None, diff: str | None = None) -> dict[str, str]:
|
| 46 |
+
target = self._resolve_path(path, write=True)
|
| 47 |
+
before = target.read_text(encoding="utf-8")
|
| 48 |
+
if content is not None:
|
| 49 |
+
target.write_text(content, encoding="utf-8")
|
| 50 |
+
else:
|
| 51 |
+
self._apply_unified_diff(target, diff or "")
|
| 52 |
+
after = target.read_text(encoding="utf-8")
|
| 53 |
+
patch_diff = "".join(
|
| 54 |
+
difflib.unified_diff(
|
| 55 |
+
before.splitlines(True),
|
| 56 |
+
after.splitlines(True),
|
| 57 |
+
fromfile=path,
|
| 58 |
+
tofile=path,
|
| 59 |
+
)
|
| 60 |
+
)
|
| 61 |
+
self.state.patch_diff = patch_diff
|
| 62 |
+
files_touched = self.state.metrics.setdefault("files_touched", [])
|
| 63 |
+
if path not in files_touched:
|
| 64 |
+
files_touched.append(path)
|
| 65 |
+
return {"path": path, "diff": patch_diff}
|
| 66 |
+
|
| 67 |
+
def read_openapi(self) -> str:
|
| 68 |
+
routes = self.state.visible_facts.get("workspace_summary", {}).get("routes", [])
|
| 69 |
+
paths: dict[str, Any] = {}
|
| 70 |
+
for route in routes:
|
| 71 |
+
paths.setdefault(route["path"], {})[route["method"].lower()] = {
|
| 72 |
+
"x-public": bool(route.get("public", False))
|
| 73 |
+
}
|
| 74 |
+
return json.dumps(
|
| 75 |
+
{
|
| 76 |
+
"openapi": "3.1.0",
|
| 77 |
+
"info": {"title": "Generated invoices app", "version": "0.1.0"},
|
| 78 |
+
"paths": paths,
|
| 79 |
+
},
|
| 80 |
+
indent=2,
|
| 81 |
+
sort_keys=True,
|
| 82 |
+
)
|
| 83 |
+
|
| 84 |
+
def send_local_request(self, method: str, path: str, user_id: str | None = None) -> dict[str, Any]:
|
| 85 |
+
if not is_local_route(path):
|
| 86 |
+
raise ValueError("send_local_request only accepts local route paths")
|
| 87 |
+
return simulate_request(self.state, method, path, user_id)
|
| 88 |
+
|
| 89 |
+
def compare_identities(
|
| 90 |
+
self,
|
| 91 |
+
method: str,
|
| 92 |
+
path: str,
|
| 93 |
+
first_user_id: str,
|
| 94 |
+
second_user_id: str,
|
| 95 |
+
) -> dict[str, Any]:
|
| 96 |
+
if not is_local_route(path):
|
| 97 |
+
raise ValueError("compare_identities only accepts local route paths")
|
| 98 |
+
return {
|
| 99 |
+
"first": simulate_request(self.state, method, path, first_user_id),
|
| 100 |
+
"second": simulate_request(self.state, method, path, second_user_id),
|
| 101 |
+
}
|
| 102 |
+
|
| 103 |
+
def _resolve_path(self, path: str, *, write: bool = False) -> Path:
|
| 104 |
+
allowed, normalized_or_error = is_path_allowed(self.state, path, write=write)
|
| 105 |
+
if not allowed:
|
| 106 |
+
raise ValueError(normalized_or_error)
|
| 107 |
+
return self.workspace / normalized_or_error
|
| 108 |
+
|
| 109 |
+
def _apply_unified_diff(self, path: Path, diff: str) -> None:
|
| 110 |
+
if not diff.strip():
|
| 111 |
+
raise ValueError("diff or content is required")
|
| 112 |
+
original = path.read_text(encoding="utf-8").splitlines(True)
|
| 113 |
+
output: list[str] = []
|
| 114 |
+
old_index = 0
|
| 115 |
+
lines = diff.splitlines(True)
|
| 116 |
+
i = 0
|
| 117 |
+
while i < len(lines):
|
| 118 |
+
line = lines[i]
|
| 119 |
+
if not line.startswith("@@"):
|
| 120 |
+
i += 1
|
| 121 |
+
continue
|
| 122 |
+
old_start = int(line.split()[1].split(",")[0][1:])
|
| 123 |
+
output.extend(original[old_index : old_start - 1])
|
| 124 |
+
old_index = old_start - 1
|
| 125 |
+
i += 1
|
| 126 |
+
while i < len(lines) and not lines[i].startswith("@@"):
|
| 127 |
+
hunk_line = lines[i]
|
| 128 |
+
if hunk_line.startswith(" "):
|
| 129 |
+
output.append(original[old_index])
|
| 130 |
+
old_index += 1
|
| 131 |
+
elif hunk_line.startswith("-"):
|
| 132 |
+
old_index += 1
|
| 133 |
+
elif hunk_line.startswith("+"):
|
| 134 |
+
output.append(hunk_line[1:])
|
| 135 |
+
elif hunk_line.startswith("\\"):
|
| 136 |
+
pass
|
| 137 |
+
i += 1
|
| 138 |
+
output.extend(original[old_index:])
|
| 139 |
+
path.write_text("".join(output), encoding="utf-8")
|
server/authz_oracle.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Policy-oracle matrix for deterministic authorization verification."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from dataclasses import dataclass
|
| 6 |
+
from typing import Any
|
| 7 |
+
|
| 8 |
+
try:
|
| 9 |
+
from ..models import CyberSecurityOWASPState
|
| 10 |
+
from ..validators import simulate_request
|
| 11 |
+
except ImportError: # pragma: no cover
|
| 12 |
+
from models import CyberSecurityOWASPState
|
| 13 |
+
from validators import simulate_request
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
@dataclass(frozen=True)
|
| 17 |
+
class AuthzOracle:
|
| 18 |
+
"""Builds hidden allowed/denied tuples from policy facts."""
|
| 19 |
+
|
| 20 |
+
def matrix(self, state: CyberSecurityOWASPState) -> list[dict[str, Any]]:
|
| 21 |
+
hidden = state.hidden_facts
|
| 22 |
+
return [
|
| 23 |
+
{
|
| 24 |
+
"name": "owner_can_read_owned_invoice",
|
| 25 |
+
"method": "GET",
|
| 26 |
+
"path": f"/invoices/{hidden['owner_invoice_id']}",
|
| 27 |
+
"user_id": hidden["owner_user_id"],
|
| 28 |
+
"expected_status": 200,
|
| 29 |
+
"allowed": True,
|
| 30 |
+
},
|
| 31 |
+
{
|
| 32 |
+
"name": "resource_owner_can_read_own_invoice",
|
| 33 |
+
"method": "GET",
|
| 34 |
+
"path": f"/invoices/{hidden['other_invoice_id']}",
|
| 35 |
+
"user_id": hidden["intruder_user_id"],
|
| 36 |
+
"expected_status": 200,
|
| 37 |
+
"allowed": True,
|
| 38 |
+
},
|
| 39 |
+
{
|
| 40 |
+
"name": "billing_admin_can_read_same_tenant_invoice",
|
| 41 |
+
"method": "GET",
|
| 42 |
+
"path": f"/invoices/{hidden['other_invoice_id']}",
|
| 43 |
+
"user_id": hidden["admin_user_id"],
|
| 44 |
+
"expected_status": 200,
|
| 45 |
+
"allowed": True,
|
| 46 |
+
},
|
| 47 |
+
{
|
| 48 |
+
"name": "same_tenant_non_owner_denied",
|
| 49 |
+
"method": "GET",
|
| 50 |
+
"path": f"/invoices/{hidden['other_invoice_id']}",
|
| 51 |
+
"user_id": hidden["owner_user_id"],
|
| 52 |
+
"expected_status": 403,
|
| 53 |
+
"allowed": False,
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"name": "cross_tenant_admin_denied",
|
| 57 |
+
"method": "GET",
|
| 58 |
+
"path": f"/invoices/{hidden['foreign_invoice_id']}",
|
| 59 |
+
"user_id": hidden["admin_user_id"],
|
| 60 |
+
"expected_status": 403,
|
| 61 |
+
"allowed": False,
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"name": "health_remains_public",
|
| 65 |
+
"method": "GET",
|
| 66 |
+
"path": "/health",
|
| 67 |
+
"user_id": None,
|
| 68 |
+
"expected_status": 200,
|
| 69 |
+
"allowed": True,
|
| 70 |
+
},
|
| 71 |
+
]
|
| 72 |
+
|
| 73 |
+
def evaluate(self, state: CyberSecurityOWASPState) -> dict[str, Any]:
|
| 74 |
+
cases = []
|
| 75 |
+
for case in self.matrix(state):
|
| 76 |
+
response = simulate_request(
|
| 77 |
+
state,
|
| 78 |
+
str(case["method"]),
|
| 79 |
+
str(case["path"]),
|
| 80 |
+
case.get("user_id"),
|
| 81 |
+
)
|
| 82 |
+
actual = int(response["status"])
|
| 83 |
+
cases.append(
|
| 84 |
+
{
|
| 85 |
+
"name": case["name"],
|
| 86 |
+
"allowed": bool(case["allowed"]),
|
| 87 |
+
"expected_status": int(case["expected_status"]),
|
| 88 |
+
"actual_status": actual,
|
| 89 |
+
"passed": actual == int(case["expected_status"]),
|
| 90 |
+
}
|
| 91 |
+
)
|
| 92 |
+
return {"passed": all(case["passed"] for case in cases), "cases": cases}
|
server/curriculum.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Runtime curriculum controller for closed-loop scenario selection."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from collections import defaultdict, deque
|
| 6 |
+
from dataclasses import dataclass, field
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from ..models import CyberSecurityOWASPState
|
| 11 |
+
except ImportError: # pragma: no cover
|
| 12 |
+
from models import CyberSecurityOWASPState
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
DIFFICULTY_TIERS = ("warmup", "beginner", "intermediate", "advanced", "expert")
|
| 16 |
+
WEAKNESS_TARGETS = (
|
| 17 |
+
"same_role_cross_object",
|
| 18 |
+
"cross_tenant_boundary",
|
| 19 |
+
"public_route_overlock",
|
| 20 |
+
"alternate_route_same_service",
|
| 21 |
+
"visible_test_edge_case",
|
| 22 |
+
)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
@dataclass
|
| 26 |
+
class CurriculumController:
|
| 27 |
+
"""Tracks episode outcomes and picks the next bounded weakness target."""
|
| 28 |
+
|
| 29 |
+
window_size: int = 10
|
| 30 |
+
reward_trend: deque[float] = field(default_factory=lambda: deque(maxlen=10))
|
| 31 |
+
outcomes_by_target: dict[str, list[bool]] = field(default_factory=lambda: defaultdict(list))
|
| 32 |
+
failures_by_target: dict[str, int] = field(default_factory=lambda: defaultdict(int))
|
| 33 |
+
episodes_seen: int = 0
|
| 34 |
+
|
| 35 |
+
def select_profile(
|
| 36 |
+
self,
|
| 37 |
+
*,
|
| 38 |
+
seed: int,
|
| 39 |
+
split: str = "train",
|
| 40 |
+
requested_difficulty: int = 0,
|
| 41 |
+
) -> dict[str, Any]:
|
| 42 |
+
difficulty = self._difficulty_for_split(split, requested_difficulty)
|
| 43 |
+
target = self._target_for_seed(seed, split)
|
| 44 |
+
if self.failures_by_target:
|
| 45 |
+
target = max(
|
| 46 |
+
WEAKNESS_TARGETS,
|
| 47 |
+
key=lambda item: (self.failures_by_target.get(item, 0), -WEAKNESS_TARGETS.index(item)),
|
| 48 |
+
)
|
| 49 |
+
return {
|
| 50 |
+
"difficulty": difficulty,
|
| 51 |
+
"difficulty_tier": DIFFICULTY_TIERS[min(difficulty, len(DIFFICULTY_TIERS) - 1)],
|
| 52 |
+
"target_weakness": target,
|
| 53 |
+
"split": split,
|
| 54 |
+
"episodes_seen": self.episodes_seen,
|
| 55 |
+
"recent_reward_mean": self._recent_reward_mean(),
|
| 56 |
+
"mastery": self.mastery_snapshot(),
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
def record_episode(self, state: CyberSecurityOWASPState) -> dict[str, Any]:
|
| 60 |
+
target = state.target_weakness or "same_role_cross_object"
|
| 61 |
+
success = bool(state.success)
|
| 62 |
+
self.episodes_seen += 1
|
| 63 |
+
self.outcomes_by_target[target].append(success)
|
| 64 |
+
if not success:
|
| 65 |
+
self.failures_by_target[target] += 1
|
| 66 |
+
self.reward_trend.append(float(state.last_reward or 0.0))
|
| 67 |
+
return self.mastery_snapshot()
|
| 68 |
+
|
| 69 |
+
def mastery_snapshot(self) -> dict[str, Any]:
|
| 70 |
+
target_mastery = {}
|
| 71 |
+
for target in WEAKNESS_TARGETS:
|
| 72 |
+
outcomes = self.outcomes_by_target.get(target, [])
|
| 73 |
+
target_mastery[target] = {
|
| 74 |
+
"episodes": len(outcomes),
|
| 75 |
+
"success_rate": sum(1 for item in outcomes if item) / max(1, len(outcomes)),
|
| 76 |
+
"failures": self.failures_by_target.get(target, 0),
|
| 77 |
+
}
|
| 78 |
+
return {
|
| 79 |
+
"episodes_seen": self.episodes_seen,
|
| 80 |
+
"recent_reward_mean": self._recent_reward_mean(),
|
| 81 |
+
"target_mastery": target_mastery,
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
def _difficulty_for_split(self, split: str, requested_difficulty: int) -> int:
|
| 85 |
+
difficulty = max(0, min(int(requested_difficulty), len(DIFFICULTY_TIERS) - 1))
|
| 86 |
+
if split == "hidden_eval":
|
| 87 |
+
return max(3, difficulty)
|
| 88 |
+
if self.episodes_seen >= self.window_size and self._recent_reward_mean() > 10.0:
|
| 89 |
+
return min(difficulty + 1, len(DIFFICULTY_TIERS) - 1)
|
| 90 |
+
return difficulty
|
| 91 |
+
|
| 92 |
+
def _target_for_seed(self, seed: int, split: str) -> str:
|
| 93 |
+
offset = 2 if split == "hidden_eval" else 0
|
| 94 |
+
return WEAKNESS_TARGETS[(int(seed) + offset) % len(WEAKNESS_TARGETS)]
|
| 95 |
+
|
| 96 |
+
def _recent_reward_mean(self) -> float:
|
| 97 |
+
if not self.reward_trend:
|
| 98 |
+
return 0.0
|
| 99 |
+
return sum(self.reward_trend) / len(self.reward_trend)
|
server/episode_logger.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Episode artifact logging for training, debugging, and demos."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
import os
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
+
|
| 10 |
+
try:
|
| 11 |
+
from ..models import CyberSecurityOWASPState
|
| 12 |
+
except ImportError: # pragma: no cover
|
| 13 |
+
from models import CyberSecurityOWASPState
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class EpisodeArtifactLogger:
|
| 17 |
+
"""Appends compact JSONL episode transcripts under outputs/rollouts."""
|
| 18 |
+
|
| 19 |
+
def __init__(self, output_path: str | Path | None = None):
|
| 20 |
+
configured = output_path or os.getenv("CYBERSECURITY_OWASP_EPISODE_LOG")
|
| 21 |
+
self.output_path = Path(configured) if configured else Path("outputs/rollouts/episodes.jsonl")
|
| 22 |
+
|
| 23 |
+
def log_episode(
|
| 24 |
+
self,
|
| 25 |
+
state: CyberSecurityOWASPState,
|
| 26 |
+
*,
|
| 27 |
+
final_observation: dict[str, Any] | None = None,
|
| 28 |
+
) -> Path:
|
| 29 |
+
self.output_path.parent.mkdir(parents=True, exist_ok=True)
|
| 30 |
+
record = {
|
| 31 |
+
"episode_id": state.episode_id,
|
| 32 |
+
"task_id": state.task_id,
|
| 33 |
+
"seed": state.seed,
|
| 34 |
+
"split": state.split,
|
| 35 |
+
"difficulty": state.difficulty,
|
| 36 |
+
"difficulty_tier": state.difficulty_tier,
|
| 37 |
+
"template_id": state.template_id,
|
| 38 |
+
"scenario_family": state.scenario_family,
|
| 39 |
+
"domain": state.domain,
|
| 40 |
+
"bug_family": state.bug_family,
|
| 41 |
+
"target_weakness": state.target_weakness,
|
| 42 |
+
"agent_actions": state.action_history,
|
| 43 |
+
"observations": state.observation_history,
|
| 44 |
+
"final_observation": final_observation or {},
|
| 45 |
+
"patch_diff": state.patch_diff,
|
| 46 |
+
"visible_test_result": self._verifier_layer(state, "visible"),
|
| 47 |
+
"hidden_test_result": self._verifier_layer(state, "hidden_tests"),
|
| 48 |
+
"oracle_result": self._verifier_layer(state, "oracle_matrix"),
|
| 49 |
+
"regression_result": self._verifier_layer(state, "regression"),
|
| 50 |
+
"reward_breakdown": state.reward_history[-1] if state.reward_history else {},
|
| 51 |
+
"reward_breakdown_by_step": state.reward_history,
|
| 52 |
+
"final_status": "resolved" if state.success else "failed",
|
| 53 |
+
"failure_reason": state.failure_reason,
|
| 54 |
+
"safety_violations": [
|
| 55 |
+
flag for flag in state.anti_cheat_flags if "network" in flag or "unsafe" in flag
|
| 56 |
+
],
|
| 57 |
+
"anti_cheat_flags": state.anti_cheat_flags,
|
| 58 |
+
"metrics": state.metrics,
|
| 59 |
+
}
|
| 60 |
+
with self.output_path.open("a", encoding="utf-8") as handle:
|
| 61 |
+
handle.write(json.dumps(record, sort_keys=True) + "\n")
|
| 62 |
+
state.episode_artifact_path = str(self.output_path)
|
| 63 |
+
return self.output_path
|
| 64 |
+
|
| 65 |
+
def _verifier_layer(self, state: CyberSecurityOWASPState, key: str) -> Any:
|
| 66 |
+
return (state.verification_summary or {}).get(key)
|
server/reward_engine.py
CHANGED
|
@@ -5,45 +5,24 @@ from __future__ import annotations
|
|
| 5 |
try:
|
| 6 |
from ..models import CyberSecurityOWASPAction, CyberSecurityOWASPState
|
| 7 |
from ..rewards import compute_reward
|
| 8 |
-
from .
|
| 9 |
-
patch_quality,
|
| 10 |
-
run_hidden_regression_tests,
|
| 11 |
-
run_hidden_security_tests,
|
| 12 |
-
run_public_route_tests,
|
| 13 |
-
run_visible_tests,
|
| 14 |
-
verify_finding,
|
| 15 |
-
)
|
| 16 |
except ImportError: # pragma: no cover
|
| 17 |
from models import CyberSecurityOWASPAction, CyberSecurityOWASPState
|
| 18 |
from rewards import compute_reward
|
| 19 |
-
from
|
| 20 |
-
patch_quality,
|
| 21 |
-
run_hidden_regression_tests,
|
| 22 |
-
run_hidden_security_tests,
|
| 23 |
-
run_public_route_tests,
|
| 24 |
-
run_visible_tests,
|
| 25 |
-
verify_finding,
|
| 26 |
-
)
|
| 27 |
|
| 28 |
|
| 29 |
def evaluate_action(
|
| 30 |
state: CyberSecurityOWASPState,
|
| 31 |
action: CyberSecurityOWASPAction,
|
| 32 |
anti_cheat_flags: list[str] | None = None,
|
|
|
|
|
|
|
| 33 |
) -> tuple[dict, dict[str, float]]:
|
| 34 |
-
verifier_result
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
verifier_result.update(
|
| 41 |
-
{
|
| 42 |
-
"visible": run_visible_tests(state),
|
| 43 |
-
"security": run_hidden_security_tests(state),
|
| 44 |
-
"regression": run_hidden_regression_tests(state),
|
| 45 |
-
"public_routes": run_public_route_tests(state),
|
| 46 |
-
"patch_quality": patch_quality(state),
|
| 47 |
-
}
|
| 48 |
-
)
|
| 49 |
return verifier_result, compute_reward(state, action, verifier_result)
|
|
|
|
| 5 |
try:
|
| 6 |
from ..models import CyberSecurityOWASPAction, CyberSecurityOWASPState
|
| 7 |
from ..rewards import compute_reward
|
| 8 |
+
from .verifier import MultiLayerVerifier
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
except ImportError: # pragma: no cover
|
| 10 |
from models import CyberSecurityOWASPAction, CyberSecurityOWASPState
|
| 11 |
from rewards import compute_reward
|
| 12 |
+
from server.verifier import MultiLayerVerifier
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
def evaluate_action(
|
| 16 |
state: CyberSecurityOWASPState,
|
| 17 |
action: CyberSecurityOWASPAction,
|
| 18 |
anti_cheat_flags: list[str] | None = None,
|
| 19 |
+
*,
|
| 20 |
+
invalid_action: bool = False,
|
| 21 |
) -> tuple[dict, dict[str, float]]:
|
| 22 |
+
verifier_result = MultiLayerVerifier().evaluate_action(
|
| 23 |
+
state,
|
| 24 |
+
action,
|
| 25 |
+
anti_cheat_flags,
|
| 26 |
+
invalid_action=invalid_action,
|
| 27 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
return verifier_result, compute_reward(state, action, verifier_result)
|
server/scenario_factory.py
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Closed-loop scenario factory for CyberSecurity_OWASP."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import os
|
| 6 |
+
import tempfile
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
from typing import Any
|
| 9 |
+
from uuid import uuid4
|
| 10 |
+
|
| 11 |
+
try:
|
| 12 |
+
from ..fixture_generator import visible_workspace_summary
|
| 13 |
+
from ..policy_graph import build_invoice_policy
|
| 14 |
+
from ..template_renderer import render_fastapi_basic
|
| 15 |
+
from .adversarial_designer import BoundedAdversarialDesigner
|
| 16 |
+
except ImportError: # pragma: no cover
|
| 17 |
+
from fixture_generator import visible_workspace_summary
|
| 18 |
+
from policy_graph import build_invoice_policy
|
| 19 |
+
from template_renderer import render_fastapi_basic
|
| 20 |
+
from server.adversarial_designer import BoundedAdversarialDesigner
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def _make_workspace(prefix: str) -> Path:
|
| 24 |
+
root = Path(os.getenv("CYBERSECURITY_OWASP_WORKSPACE_ROOT", tempfile.gettempdir()))
|
| 25 |
+
root.mkdir(parents=True, exist_ok=True)
|
| 26 |
+
for _ in range(100):
|
| 27 |
+
workspace = root / f"{prefix}{uuid4().hex[:12]}"
|
| 28 |
+
try:
|
| 29 |
+
workspace.mkdir()
|
| 30 |
+
except FileExistsError:
|
| 31 |
+
continue
|
| 32 |
+
return workspace
|
| 33 |
+
raise RuntimeError("Unable to create isolated scenario workspace")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def _visible_policy_hint(public_hint: dict[str, Any]) -> dict[str, Any]:
|
| 37 |
+
"""Return partial policy observability without hidden oracle/test labels."""
|
| 38 |
+
|
| 39 |
+
return {
|
| 40 |
+
"domain": public_hint.get("domain", "invoices"),
|
| 41 |
+
"policy_rules": list(public_hint.get("policy_rules", [])),
|
| 42 |
+
"fixture_aliases": {
|
| 43 |
+
"users": dict(public_hint.get("users", {})),
|
| 44 |
+
"resources": dict(public_hint.get("resources", {})),
|
| 45 |
+
},
|
| 46 |
+
"public_routes": list(public_hint.get("public_routes", [])),
|
| 47 |
+
"observation_contract": {
|
| 48 |
+
"visible": [
|
| 49 |
+
"product policy summary",
|
| 50 |
+
"fixture aliases needed for local requests",
|
| 51 |
+
"route summaries",
|
| 52 |
+
"visible test results",
|
| 53 |
+
],
|
| 54 |
+
"hidden": [
|
| 55 |
+
"evaluator-only policy tuples",
|
| 56 |
+
"withheld invariant checks",
|
| 57 |
+
"withheld scenario labels",
|
| 58 |
+
"held-out family label",
|
| 59 |
+
],
|
| 60 |
+
},
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class ScenarioFactory:
|
| 65 |
+
"""Compiles deterministic local app scenarios from curriculum profiles."""
|
| 66 |
+
|
| 67 |
+
def __init__(self, designer: BoundedAdversarialDesigner | None = None):
|
| 68 |
+
self.designer = designer or BoundedAdversarialDesigner()
|
| 69 |
+
|
| 70 |
+
def compile_scenario(
|
| 71 |
+
self,
|
| 72 |
+
seed: int,
|
| 73 |
+
*,
|
| 74 |
+
split: str = "train",
|
| 75 |
+
difficulty: int = 0,
|
| 76 |
+
curriculum_profile: dict[str, Any] | None = None,
|
| 77 |
+
) -> dict[str, Any]:
|
| 78 |
+
profile = curriculum_profile or {
|
| 79 |
+
"difficulty": difficulty,
|
| 80 |
+
"difficulty_tier": "warmup",
|
| 81 |
+
"target_weakness": "same_role_cross_object",
|
| 82 |
+
}
|
| 83 |
+
adversarial_spec = self.designer.design(
|
| 84 |
+
seed=seed, split=split, curriculum_profile=profile
|
| 85 |
+
)
|
| 86 |
+
compiled = build_invoice_policy(seed)
|
| 87 |
+
workspace = _make_workspace(prefix=f"cybersecurity_owasp_{split}_{seed}_")
|
| 88 |
+
public_hint = _visible_policy_hint(compiled.public_hint)
|
| 89 |
+
editable_files = render_fastapi_basic(workspace, public_hint, compiled.hidden_facts)
|
| 90 |
+
workspace_summary = visible_workspace_summary(editable_files, public_hint)
|
| 91 |
+
workspace_summary.update(
|
| 92 |
+
{
|
| 93 |
+
"template_id": adversarial_spec["template_id"],
|
| 94 |
+
"target_weakness": adversarial_spec["target_weakness"],
|
| 95 |
+
}
|
| 96 |
+
)
|
| 97 |
+
|
| 98 |
+
hidden = dict(compiled.hidden_facts)
|
| 99 |
+
hidden.update(
|
| 100 |
+
{
|
| 101 |
+
"workspace": str(workspace),
|
| 102 |
+
"editable_files": editable_files,
|
| 103 |
+
"initial_file_hashes": {
|
| 104 |
+
path: (workspace / path).read_text(encoding="utf-8")
|
| 105 |
+
for path in editable_files
|
| 106 |
+
},
|
| 107 |
+
"adversarial_spec": adversarial_spec,
|
| 108 |
+
"scenario_family": adversarial_spec["scenario_family"],
|
| 109 |
+
"template_id": adversarial_spec["template_id"],
|
| 110 |
+
"target_weakness": adversarial_spec["target_weakness"],
|
| 111 |
+
"oracle_hidden_focus": adversarial_spec["hidden_focus"],
|
| 112 |
+
}
|
| 113 |
+
)
|
| 114 |
+
|
| 115 |
+
return {
|
| 116 |
+
"task_id": f"{split}-invoices-bola-{seed}",
|
| 117 |
+
"workspace": workspace,
|
| 118 |
+
"domain": adversarial_spec["domain"],
|
| 119 |
+
"bug_family": adversarial_spec["bug_family"],
|
| 120 |
+
"scenario_family": adversarial_spec["scenario_family"],
|
| 121 |
+
"template_id": adversarial_spec["template_id"],
|
| 122 |
+
"target_weakness": adversarial_spec["target_weakness"],
|
| 123 |
+
"difficulty": int(profile.get("difficulty", difficulty)),
|
| 124 |
+
"difficulty_tier": str(profile.get("difficulty_tier", "warmup")),
|
| 125 |
+
"curriculum_snapshot": profile,
|
| 126 |
+
"task_brief": (
|
| 127 |
+
"Inspect the generated invoices app and policy. Find the broken "
|
| 128 |
+
"authorization behavior, submit a finding with local evidence, patch "
|
| 129 |
+
"the app, preserve intended owner/admin/public behavior, then submit."
|
| 130 |
+
),
|
| 131 |
+
"public_hint": public_hint,
|
| 132 |
+
"workspace_summary": workspace_summary,
|
| 133 |
+
"hidden_facts": hidden,
|
| 134 |
+
}
|
server/verifier.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Multi-layer deterministic verifier for CyberSecurity_OWASP."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import json
|
| 6 |
+
from dataclasses import dataclass
|
| 7 |
+
from typing import Any
|
| 8 |
+
|
| 9 |
+
try:
|
| 10 |
+
from ..models import CyberSecurityOWASPAction, CyberSecurityOWASPState
|
| 11 |
+
from ..validators import (
|
| 12 |
+
patch_quality,
|
| 13 |
+
run_hidden_regression_tests,
|
| 14 |
+
run_hidden_security_tests,
|
| 15 |
+
run_public_route_tests,
|
| 16 |
+
run_visible_tests,
|
| 17 |
+
verify_finding,
|
| 18 |
+
)
|
| 19 |
+
from .authz_oracle import AuthzOracle
|
| 20 |
+
except ImportError: # pragma: no cover
|
| 21 |
+
from models import CyberSecurityOWASPAction, CyberSecurityOWASPState
|
| 22 |
+
from validators import (
|
| 23 |
+
patch_quality,
|
| 24 |
+
run_hidden_regression_tests,
|
| 25 |
+
run_hidden_security_tests,
|
| 26 |
+
run_public_route_tests,
|
| 27 |
+
run_visible_tests,
|
| 28 |
+
verify_finding,
|
| 29 |
+
)
|
| 30 |
+
from server.authz_oracle import AuthzOracle
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class MultiLayerVerifier:
|
| 35 |
+
"""Aggregates visible, hidden, oracle, regression, and patch-quality checks."""
|
| 36 |
+
|
| 37 |
+
oracle: AuthzOracle = AuthzOracle()
|
| 38 |
+
|
| 39 |
+
def evaluate_action(
|
| 40 |
+
self,
|
| 41 |
+
state: CyberSecurityOWASPState,
|
| 42 |
+
action: CyberSecurityOWASPAction,
|
| 43 |
+
anti_cheat_flags: list[str] | None = None,
|
| 44 |
+
*,
|
| 45 |
+
invalid_action: bool = False,
|
| 46 |
+
) -> dict[str, Any]:
|
| 47 |
+
verifier_result: dict[str, Any] = {
|
| 48 |
+
"anti_cheat_flags": anti_cheat_flags or [],
|
| 49 |
+
"invalid_action": invalid_action,
|
| 50 |
+
"repeated_action": self._is_repeated_action(state, action),
|
| 51 |
+
}
|
| 52 |
+
if action.tool_name == "submit_finding":
|
| 53 |
+
verifier_result["finding"] = verify_finding(state, action.arguments)
|
| 54 |
+
elif action.tool_name == "run_visible_tests":
|
| 55 |
+
verifier_result["visible"] = run_visible_tests(state)
|
| 56 |
+
elif action.tool_name == "submit_fix":
|
| 57 |
+
verifier_result.update(self.run_terminal_checks(state))
|
| 58 |
+
return verifier_result
|
| 59 |
+
|
| 60 |
+
def run_terminal_checks(self, state: CyberSecurityOWASPState) -> dict[str, Any]:
|
| 61 |
+
security = run_hidden_security_tests(state)
|
| 62 |
+
return {
|
| 63 |
+
"visible": run_visible_tests(state),
|
| 64 |
+
"hidden_tests": security,
|
| 65 |
+
"security": security,
|
| 66 |
+
"oracle_matrix": self.oracle.evaluate(state),
|
| 67 |
+
"regression": run_hidden_regression_tests(state),
|
| 68 |
+
"public_routes": run_public_route_tests(state),
|
| 69 |
+
"patch_quality": patch_quality(state),
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
def public_summary(self, verifier_result: dict[str, Any]) -> dict[str, Any]:
|
| 73 |
+
"""Return verifier fields that are safe for state/debug summaries."""
|
| 74 |
+
|
| 75 |
+
return json.loads(json.dumps(verifier_result))
|
| 76 |
+
|
| 77 |
+
def _is_repeated_action(
|
| 78 |
+
self, state: CyberSecurityOWASPState, action: CyberSecurityOWASPAction
|
| 79 |
+
) -> bool:
|
| 80 |
+
current = {"tool_name": action.tool_name, "arguments": action.arguments}
|
| 81 |
+
return sum(1 for item in state.action_history if item == current) > 1
|
tests/test_closed_loop_runtime.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
|
| 4 |
+
from CyberSecurity_OWASP.models import CyberSecurityOWASPAction
|
| 5 |
+
from CyberSecurity_OWASP.server.adversarial_designer import BoundedAdversarialDesigner
|
| 6 |
+
from CyberSecurity_OWASP.server.authz_oracle import AuthzOracle
|
| 7 |
+
from CyberSecurity_OWASP.server.curriculum import CurriculumController
|
| 8 |
+
from CyberSecurity_OWASP.server.verifier import MultiLayerVerifier
|
| 9 |
+
|
| 10 |
+
from .helpers import apply_secure_patch, make_env, submit_valid_finding
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def test_curriculum_selects_profile_and_tracks_mastery():
|
| 14 |
+
controller = CurriculumController()
|
| 15 |
+
profile = controller.select_profile(seed=3, split="train", requested_difficulty=1)
|
| 16 |
+
|
| 17 |
+
assert profile["difficulty_tier"] == "beginner"
|
| 18 |
+
assert profile["target_weakness"]
|
| 19 |
+
assert "target_mastery" in profile["mastery"]
|
| 20 |
+
|
| 21 |
+
env = make_env(70)
|
| 22 |
+
controller.record_episode(env.state)
|
| 23 |
+
snapshot = controller.mastery_snapshot()
|
| 24 |
+
assert snapshot["episodes_seen"] == 1
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_adversarial_designer_marks_hidden_eval_as_heldout_family():
|
| 28 |
+
designer = BoundedAdversarialDesigner()
|
| 29 |
+
spec = designer.design(
|
| 30 |
+
seed=4,
|
| 31 |
+
split="hidden_eval",
|
| 32 |
+
curriculum_profile={"target_weakness": "cross_tenant_boundary"},
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
assert spec["safe_lab_only"] is True
|
| 36 |
+
assert spec["scenario_family"].startswith("heldout.")
|
| 37 |
+
assert spec["target_weakness"] == "cross_tenant_boundary"
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def test_reset_records_scenario_family_and_partial_observability():
|
| 41 |
+
env = make_env(71)
|
| 42 |
+
obs = env.reset(seed=71, split="hidden_eval", difficulty=1)
|
| 43 |
+
serialized_hint = json.dumps(obs.visible_policy_hint).lower()
|
| 44 |
+
|
| 45 |
+
assert env.state.scenario_family.startswith("heldout.")
|
| 46 |
+
assert env.state.difficulty_tier in {"advanced", "expert"}
|
| 47 |
+
assert "oracle_matrix" not in serialized_hint
|
| 48 |
+
assert "hidden_tests" not in serialized_hint
|
| 49 |
+
assert "injected bug" not in serialized_hint
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def test_authz_oracle_fails_vulnerable_app_and_passes_secure_patch():
|
| 53 |
+
env = make_env(72)
|
| 54 |
+
oracle = AuthzOracle()
|
| 55 |
+
|
| 56 |
+
vulnerable = oracle.evaluate(env.state)
|
| 57 |
+
assert vulnerable["passed"] is False
|
| 58 |
+
|
| 59 |
+
submit_valid_finding(env)
|
| 60 |
+
apply_secure_patch(env)
|
| 61 |
+
fixed = oracle.evaluate(env.state)
|
| 62 |
+
assert fixed["passed"] is True
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def test_multilayer_verifier_aggregates_terminal_layers():
|
| 66 |
+
env = make_env(73)
|
| 67 |
+
submit_valid_finding(env)
|
| 68 |
+
apply_secure_patch(env)
|
| 69 |
+
|
| 70 |
+
verifier = MultiLayerVerifier().run_terminal_checks(env.state)
|
| 71 |
+
assert verifier["visible"]["passed"] is True
|
| 72 |
+
assert verifier["hidden_tests"]["passed"] is True
|
| 73 |
+
assert verifier["oracle_matrix"]["passed"] is True
|
| 74 |
+
assert verifier["regression"]["passed"] is True
|
| 75 |
+
assert verifier["public_routes"]["passed"] is True
|
| 76 |
+
assert verifier["patch_quality"]["passed"] is True
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def test_solved_episode_writes_jsonl_artifact_with_verifier_fields():
|
| 80 |
+
env = make_env(74)
|
| 81 |
+
submit_valid_finding(env)
|
| 82 |
+
apply_secure_patch(env)
|
| 83 |
+
env.step(CyberSecurityOWASPAction(tool_name="run_visible_tests"))
|
| 84 |
+
final = env.step(CyberSecurityOWASPAction(tool_name="submit_fix"))
|
| 85 |
+
|
| 86 |
+
artifact_path = Path(env.state.episode_artifact_path or "")
|
| 87 |
+
assert final.done is True
|
| 88 |
+
assert artifact_path.exists()
|
| 89 |
+
record = json.loads(artifact_path.read_text(encoding="utf-8").splitlines()[-1])
|
| 90 |
+
assert record["episode_id"] == env.state.episode_id
|
| 91 |
+
assert record["final_status"] == "resolved"
|
| 92 |
+
assert record["hidden_test_result"]["passed"] is True
|
| 93 |
+
assert record["oracle_result"]["passed"] is True
|
| 94 |
+
assert record["reward_breakdown"]["total"] >= 12.0
|
tests/test_web_interface.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
from fastapi.testclient import TestClient
|
| 2 |
|
| 3 |
-
from server.app import app
|
| 4 |
|
| 5 |
|
| 6 |
def test_space_root_redirects_to_openenv_web_ui():
|
|
|
|
| 1 |
from fastapi.testclient import TestClient
|
| 2 |
|
| 3 |
+
from CyberSecurity_OWASP.server.app import app
|
| 4 |
|
| 5 |
|
| 6 |
def test_space_root_redirects_to_openenv_web_ui():
|
training/rollout.py
CHANGED
|
@@ -67,6 +67,11 @@ def rollout_once(trainer, env, tokenizer=None, dataset_prompt: str = "", max_ste
|
|
| 67 |
|
| 68 |
final_breakdown = getattr(observation, "reward_breakdown", {}) or {}
|
| 69 |
state = env.state if not callable(getattr(env, "state", None)) else env.state()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
return {
|
| 71 |
"prompt_ids": prompt_ids,
|
| 72 |
"completion_ids": completion_ids,
|
|
@@ -79,6 +84,16 @@ def rollout_once(trainer, env, tokenizer=None, dataset_prompt: str = "", max_ste
|
|
| 79 |
"reward_anti_cheat": float(final_breakdown.get("anti_cheat", 0.0)),
|
| 80 |
"success": bool(getattr(state, "success", False)),
|
| 81 |
"episode_length": len(action_trace),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
"actions": action_trace,
|
| 83 |
"observations": observation_trace,
|
| 84 |
}
|
|
|
|
| 67 |
|
| 68 |
final_breakdown = getattr(observation, "reward_breakdown", {}) or {}
|
| 69 |
state = env.state if not callable(getattr(env, "state", None)) else env.state()
|
| 70 |
+
verifier = getattr(state, "verification_summary", {}) or {}
|
| 71 |
+
anti_cheat_flags = getattr(state, "anti_cheat_flags", []) or []
|
| 72 |
+
invalid_actions = [
|
| 73 |
+
obs for obs in observation_trace if obs.get("last_action_valid") is False
|
| 74 |
+
]
|
| 75 |
return {
|
| 76 |
"prompt_ids": prompt_ids,
|
| 77 |
"completion_ids": completion_ids,
|
|
|
|
| 84 |
"reward_anti_cheat": float(final_breakdown.get("anti_cheat", 0.0)),
|
| 85 |
"success": bool(getattr(state, "success", False)),
|
| 86 |
"episode_length": len(action_trace),
|
| 87 |
+
"exploit_blocked": bool((verifier.get("security") or {}).get("passed", False)),
|
| 88 |
+
"regression_preserved": bool((verifier.get("regression") or {}).get("passed", False)),
|
| 89 |
+
"public_routes_preserved": bool((verifier.get("public_routes") or {}).get("passed", False)),
|
| 90 |
+
"anti_cheat_pass": not bool(anti_cheat_flags),
|
| 91 |
+
"invalid_action_rate": len(invalid_actions) / max(1, len(action_trace)),
|
| 92 |
+
"timeout": getattr(state, "failure_reason", None) == "max_steps_exceeded",
|
| 93 |
+
"safety_violation": bool(
|
| 94 |
+
any("network" in flag or "unsafe" in flag for flag in anti_cheat_flags)
|
| 95 |
+
),
|
| 96 |
+
"episode_artifact_path": getattr(state, "episode_artifact_path", None),
|
| 97 |
"actions": action_trace,
|
| 98 |
"observations": observation_trace,
|
| 99 |
}
|