Commit ·
f1a1961
0
Parent(s):
feat: Add CloudSecurityAuditor OpenEnv environment
Browse files- .gitignore +6 -0
- Dockerfile +17 -0
- README.md +64 -0
- openenv.yaml +24 -0
- requirements.txt +5 -0
- scripts/baseline_inference.py +67 -0
- server/app.py +15 -0
- server/environment.py +136 -0
- server/models.py +38 -0
- server/tasks.py +28 -0
.gitignore
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
.ipynb_checkpoints/
|
| 4 |
+
venv/
|
| 5 |
+
.env
|
| 6 |
+
.DS_Store
|
Dockerfile
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Copy requirement files first
|
| 6 |
+
COPY requirements.txt .
|
| 7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 8 |
+
|
| 9 |
+
# Copy source code
|
| 10 |
+
COPY server/ ./server/
|
| 11 |
+
COPY openenv.yaml .
|
| 12 |
+
|
| 13 |
+
# Expose the API port
|
| 14 |
+
EXPOSE 8000
|
| 15 |
+
|
| 16 |
+
# Start server
|
| 17 |
+
CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CloudSecurityAuditor OpenEnv
|
| 2 |
+
|
| 3 |
+
A standardized AI agent environment for simulating real-world cloud security audits. Built using the **OpenEnv** specification, it allows agents to interact with a mock cloud infrastructure to identify and remediate vulnerabilities.
|
| 4 |
+
|
| 5 |
+
## 🌟 Key Features
|
| 6 |
+
- **Typed Models**: Full Pydantic support for actions and observations.
|
| 7 |
+
- **Three Task Tiers**: Includes Easy (Information Gathering), Medium (Remediation), and Hard (Forensic Analysis).
|
| 8 |
+
- **Gymnasium-Compatible API**: Implements `step()`, `reset()`, and `state()` methods.
|
| 9 |
+
- **Reward-Driven**: Scalar rewards from 0.0 to 1.0 based on task completion.
|
| 10 |
+
|
| 11 |
+
## 🛠 Action Space
|
| 12 |
+
The agent can perform the following actions via the `step()` method:
|
| 13 |
+
|
| 14 |
+
- **`list`**: Lists resources of a specific type (`s3`, `ec2`).
|
| 15 |
+
- **`describe`**: Fetches detailed configuration for a specific resource ID.
|
| 16 |
+
- **`modify`**: Updates resource configurations (e.g., security groups).
|
| 17 |
+
- **`logs`**: Retrieves logs for a specific resource or service.
|
| 18 |
+
- **`submit`**: Submits the final answer for the evaluation tasks.
|
| 19 |
+
|
| 20 |
+
## 📊 Observation Space
|
| 21 |
+
Each step returns a `CloudObservation` containing:
|
| 22 |
+
- `resources`: A list of discovered resource records.
|
| 23 |
+
- `details`: Metadata for a specific resource.
|
| 24 |
+
- `logs`: Relevant log entries.
|
| 25 |
+
- `status`: Human-readable status message.
|
| 26 |
+
- `info`: Additional environment metadata.
|
| 27 |
+
|
| 28 |
+
## 📋 Tasks
|
| 29 |
+
|
| 30 |
+
1. **Easy (S3 Public Audit)**: Identify all public S3 buckets in the 'prod' region.
|
| 31 |
+
2. **Medium (EC2 Security Patch)**: Find an EC2 instance with RDP port open to the internet and close it.
|
| 32 |
+
3. **Hard (IAM Log Forensic)**: Trace unauthorized actions in `auth-logs` to identify a rogue IP address.
|
| 33 |
+
|
| 34 |
+
## 🚀 Setup & Installation
|
| 35 |
+
|
| 36 |
+
### Local Installation
|
| 37 |
+
```bash
|
| 38 |
+
pip install -r requirements.txt
|
| 39 |
+
```
|
| 40 |
+
|
| 41 |
+
### Running the Server
|
| 42 |
+
```bash
|
| 43 |
+
python -m server.app
|
| 44 |
+
```
|
| 45 |
+
The server will start on `http://localhost:8000`.
|
| 46 |
+
|
| 47 |
+
### Running the Baseline Agent
|
| 48 |
+
```bash
|
| 49 |
+
python scripts/baseline_inference.py
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
## 🐳 Docker Deployment
|
| 53 |
+
To build and run the containerized environment:
|
| 54 |
+
```bash
|
| 55 |
+
docker build -t cloud-security-auditor-env .
|
| 56 |
+
docker run -p 8000:8000 cloud-security-auditor-env
|
| 57 |
+
```
|
| 58 |
+
|
| 59 |
+
## 🤗 Hugging Face Spaces
|
| 60 |
+
This environment is designed to be deployed as an **OpenEnv Space**.
|
| 61 |
+
1. Create a new Space on Hugging Face.
|
| 62 |
+
2. Select **Docker** as the SDK.
|
| 63 |
+
3. Upload the repository contents (including `openenv.yaml` and `Dockerfile`).
|
| 64 |
+
4. Set the `entrypoint` to match the `uvicorn` command in `openenv.yaml`.
|
openenv.yaml
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: cloud-security-auditor
|
| 2 |
+
version: "0.1.0"
|
| 3 |
+
description: "A real-world cloud security audit environment for AI agents."
|
| 4 |
+
hardware:
|
| 5 |
+
tier: "cpu-small"
|
| 6 |
+
vCPU: 2
|
| 7 |
+
RAM: 4Gi
|
| 8 |
+
port: 8000
|
| 9 |
+
entrypoint: "uvicorn server.app:app --host 0.0.0.0 --port 8000"
|
| 10 |
+
tags:
|
| 11 |
+
- security
|
| 12 |
+
- cloud
|
| 13 |
+
- task-based
|
| 14 |
+
evaluation:
|
| 15 |
+
tasks:
|
| 16 |
+
- id: "easy"
|
| 17 |
+
name: "S3 Public Audit"
|
| 18 |
+
difficulty: "easy"
|
| 19 |
+
- id: "medium"
|
| 20 |
+
name: "EC2 Security Patch"
|
| 21 |
+
difficulty: "medium"
|
| 22 |
+
- id: "hard"
|
| 23 |
+
name: "IAM Log Forensic"
|
| 24 |
+
difficulty: "hard"
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core>=0.1.1
|
| 2 |
+
fastapi
|
| 3 |
+
uvicorn
|
| 4 |
+
pydantic
|
| 5 |
+
python-multipart
|
scripts/baseline_inference.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import requests
|
| 2 |
+
import json
|
| 3 |
+
|
| 4 |
+
BASE_URL = "http://localhost:8000"
|
| 5 |
+
|
| 6 |
+
def run_baseline_audit(task_id="easy"):
|
| 7 |
+
print(f"--- Running Baseline for Task: {task_id} ---")
|
| 8 |
+
|
| 9 |
+
# 1. Reset environment
|
| 10 |
+
response = requests.post(f"{BASE_URL}/reset", json={"task_id": task_id})
|
| 11 |
+
if response.status_code != 200:
|
| 12 |
+
print(f"Failed to reset: {response.text}")
|
| 13 |
+
return
|
| 14 |
+
|
| 15 |
+
obs_data = response.json()
|
| 16 |
+
obs = obs_data.get("observation", {})
|
| 17 |
+
print(f"Observation Info: {obs.get('info')}")
|
| 18 |
+
|
| 19 |
+
# 2. List S3 buckets
|
| 20 |
+
# Note: wrapping in "action" key to avoid collision with 'action' field in CloudAction
|
| 21 |
+
action_payload = {
|
| 22 |
+
"action": {
|
| 23 |
+
"action": "list",
|
| 24 |
+
"resource_type": "s3"
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
response = requests.post(f"{BASE_URL}/step", json=action_payload)
|
| 28 |
+
if response.status_code != 200:
|
| 29 |
+
print(f"Failed on step: {response.text}")
|
| 30 |
+
return
|
| 31 |
+
|
| 32 |
+
step_result = response.json()
|
| 33 |
+
obs = step_result.get("observation", {})
|
| 34 |
+
|
| 35 |
+
resources = obs.get("resources", [])
|
| 36 |
+
print(f"Discovered {len(resources)} S3 buckets.")
|
| 37 |
+
|
| 38 |
+
# 3. Logic to identify public prod buckets
|
| 39 |
+
public_prod_buckets = []
|
| 40 |
+
for r in resources:
|
| 41 |
+
if r.get("public") and r.get("tags", {}).get("env") == "prod":
|
| 42 |
+
public_prod_buckets.append(r["id"])
|
| 43 |
+
|
| 44 |
+
print(f"Identified Public Prod Buckets: {public_prod_buckets}")
|
| 45 |
+
|
| 46 |
+
# 4. Submit answer
|
| 47 |
+
submit_payload = {
|
| 48 |
+
"action": {
|
| 49 |
+
"action": "submit",
|
| 50 |
+
"answer": ",".join(public_prod_buckets)
|
| 51 |
+
}
|
| 52 |
+
}
|
| 53 |
+
response = requests.post(f"{BASE_URL}/step", json=submit_payload)
|
| 54 |
+
step_result = response.json()
|
| 55 |
+
obs = step_result.get("observation", {})
|
| 56 |
+
reward = step_result.get("reward", 0.0)
|
| 57 |
+
done = step_result.get("done", False)
|
| 58 |
+
|
| 59 |
+
print(f"Final Reward: {reward}")
|
| 60 |
+
print(f"Done: {done}")
|
| 61 |
+
print(f"Info: {obs.get('info')}")
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
try:
|
| 65 |
+
run_baseline_audit("easy")
|
| 66 |
+
except Exception as e:
|
| 67 |
+
print(f"Error: {e}")
|
server/app.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openenv_core.env_server import create_fastapi_app
|
| 2 |
+
from .models import CloudAction, CloudObservation
|
| 3 |
+
from .environment import CloudAuditEnv
|
| 4 |
+
|
| 5 |
+
# Initialize the environment
|
| 6 |
+
env = CloudAuditEnv()
|
| 7 |
+
|
| 8 |
+
# Create the FastAPI app
|
| 9 |
+
# Note: create_fastapi_app expects the environment instance,
|
| 10 |
+
# and the Action/Observation models for typing.
|
| 11 |
+
app = create_fastapi_app(env, CloudAction, CloudObservation)
|
| 12 |
+
|
| 13 |
+
if __name__ == "__main__":
|
| 14 |
+
import uvicorn
|
| 15 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
server/environment.py
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import uuid
|
| 2 |
+
import datetime
|
| 3 |
+
from typing import Optional, Tuple, Dict, Any, List
|
| 4 |
+
from .models import CloudAction, CloudObservation, CloudState, CloudActionType
|
| 5 |
+
|
| 6 |
+
class CloudAuditEnv:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
self.task_id = "easy"
|
| 9 |
+
self._initialize_state()
|
| 10 |
+
|
| 11 |
+
def _initialize_state(self):
|
| 12 |
+
self.episode_id = str(uuid.uuid4())
|
| 13 |
+
self.step_count = 0
|
| 14 |
+
self.is_completed = False
|
| 15 |
+
self.score = 0.0
|
| 16 |
+
|
| 17 |
+
# Mock Infrastructure
|
| 18 |
+
self.resources = {
|
| 19 |
+
"s3": [
|
| 20 |
+
{"id": "prod-data-001", "region": "us-east-1", "public": True, "tags": {"env": "prod"}},
|
| 21 |
+
{"id": "prod-logs-002", "region": "us-east-1", "public": False, "tags": {"env": "prod"}},
|
| 22 |
+
{"id": "dev-test-01", "region": "us-west-2", "public": True, "tags": {"env": "dev"}},
|
| 23 |
+
],
|
| 24 |
+
"ec2": [
|
| 25 |
+
{"id": "i-0abcdef1234567890", "type": "t2.micro", "state": "running", "tags": {"env": "dev"},
|
| 26 |
+
"security_groups": [{"id": "sg-01", "rules": [{"port": 22, "cidr": "0.0.0.0/0"}, {"port": 3389, "cidr": "0.0.0.0/0"}]}]},
|
| 27 |
+
{"id": "i-0987654321fedcba0", "type": "m5.large", "state": "running", "tags": {"env": "prod"},
|
| 28 |
+
"security_groups": [{"id": "sg-02", "rules": [{"port": 443, "cidr": "0.0.0.0/0"}]}]},
|
| 29 |
+
],
|
| 30 |
+
"logs": {
|
| 31 |
+
"auth-logs": [
|
| 32 |
+
{"timestamp": "2026-04-05T10:00:00Z", "user": "admin", "action": "Login", "ip": "1.1.1.1"},
|
| 33 |
+
{"timestamp": "2026-04-05T10:15:00Z", "user": "iam-role-01", "action": "DeleteStorage", "ip": "192.168.1.50"},
|
| 34 |
+
{"timestamp": "2026-04-05T10:30:00Z", "user": "user-02", "action": "ListBuckets", "ip": "2.2.2.2"},
|
| 35 |
+
]
|
| 36 |
+
}
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
def reset(self, task_id: str = "easy") -> CloudObservation:
|
| 40 |
+
"""Required by openenv-core 0.1.1: takes task_id, returns JUST the observation."""
|
| 41 |
+
self.task_id = task_id
|
| 42 |
+
self._initialize_state()
|
| 43 |
+
return CloudObservation(info=f"Environment reset. Task: {self.task_id}", reward=0.0, done=False)
|
| 44 |
+
|
| 45 |
+
def step(self, action: CloudAction) -> CloudObservation:
|
| 46 |
+
"""Required by openenv-core 0.1.1: takes action, returns JUST the observation with reward/done fields."""
|
| 47 |
+
self.step_count += 1
|
| 48 |
+
reward = 0.0
|
| 49 |
+
terminated = False
|
| 50 |
+
truncated = self.step_count >= 20 # Limit steps
|
| 51 |
+
|
| 52 |
+
obs = CloudObservation()
|
| 53 |
+
|
| 54 |
+
if action.action == CloudActionType.LIST:
|
| 55 |
+
r_type = action.resource_type
|
| 56 |
+
if r_type in self.resources:
|
| 57 |
+
obs.resources = self.resources[r_type]
|
| 58 |
+
obs.status = f"Listed {len(obs.resources)} {r_type} resources."
|
| 59 |
+
else:
|
| 60 |
+
obs.status = f"Unknown resource type: {r_type}"
|
| 61 |
+
|
| 62 |
+
elif action.action == CloudActionType.DESCRIBE:
|
| 63 |
+
res_id = action.resource_id
|
| 64 |
+
found = False
|
| 65 |
+
for r_type in ["s3", "ec2"]:
|
| 66 |
+
for r in self.resources[r_type]:
|
| 67 |
+
if r["id"] == res_id:
|
| 68 |
+
obs.details = r
|
| 69 |
+
obs.status = f"Described resource {res_id}"
|
| 70 |
+
found = True
|
| 71 |
+
break
|
| 72 |
+
if not found:
|
| 73 |
+
obs.status = f"Resource not found: {res_id}"
|
| 74 |
+
|
| 75 |
+
elif action.action == CloudActionType.MODIFY:
|
| 76 |
+
res_id = action.resource_id
|
| 77 |
+
patch = action.patch
|
| 78 |
+
# Simple EC2 security group patching for Medium task
|
| 79 |
+
if self.task_id == "medium" and res_id == "i-0abcdef1234567890":
|
| 80 |
+
for sg in self.resources["ec2"][0]["security_groups"]:
|
| 81 |
+
if patch and "rules" in patch:
|
| 82 |
+
sg["rules"] = patch["rules"]
|
| 83 |
+
obs.status = f"Updated security groups for {res_id}"
|
| 84 |
+
# Check for reward
|
| 85 |
+
rules = self.resources["ec2"][0]["security_groups"][0]["rules"]
|
| 86 |
+
has_rdp = any(r["port"] == 3389 and r["cidr"] == "0.0.0.0/0" for r in rules)
|
| 87 |
+
if not has_rdp:
|
| 88 |
+
reward = 1.0
|
| 89 |
+
terminated = True
|
| 90 |
+
else:
|
| 91 |
+
obs.status = "Action not permitted or invalid resource."
|
| 92 |
+
|
| 93 |
+
elif action.action == CloudActionType.LOGS:
|
| 94 |
+
log_name = action.resource_id
|
| 95 |
+
if log_name in self.resources["logs"]:
|
| 96 |
+
obs.logs = self.resources["logs"][log_name]
|
| 97 |
+
obs.status = f"Fetched logs for {log_name}"
|
| 98 |
+
else:
|
| 99 |
+
obs.status = f"Logs not found: {log_name}"
|
| 100 |
+
|
| 101 |
+
elif action.action == CloudActionType.SUBMIT:
|
| 102 |
+
# For Easy and Hard tasks
|
| 103 |
+
if self.task_id == "easy":
|
| 104 |
+
# Expecting agent to list public S3 buckets in prod
|
| 105 |
+
if action.answer:
|
| 106 |
+
answers = [a.strip() for a in action.answer.split(",")]
|
| 107 |
+
expected = ["prod-data-001"]
|
| 108 |
+
if set(answers) == set(expected):
|
| 109 |
+
reward = 1.0
|
| 110 |
+
terminated = True
|
| 111 |
+
obs.info = "Correct! Task completed."
|
| 112 |
+
else:
|
| 113 |
+
obs.info = f"Incorrect list of buckets. Got: {answers}"
|
| 114 |
+
|
| 115 |
+
elif self.task_id == "hard":
|
| 116 |
+
# Expecting rogue IP
|
| 117 |
+
if action.answer == "192.168.1.50":
|
| 118 |
+
reward = 1.0
|
| 119 |
+
terminated = True
|
| 120 |
+
obs.info = "Correct Rogue IP identified!"
|
| 121 |
+
else:
|
| 122 |
+
obs.info = f"Wrong IP. Got: {action.answer}"
|
| 123 |
+
|
| 124 |
+
self.score += reward
|
| 125 |
+
obs.reward = reward
|
| 126 |
+
obs.done = terminated or truncated
|
| 127 |
+
return obs
|
| 128 |
+
|
| 129 |
+
def state(self) -> CloudState:
|
| 130 |
+
return CloudState(
|
| 131 |
+
episode_id=self.episode_id,
|
| 132 |
+
step_count=self.step_count,
|
| 133 |
+
task_id=self.task_id,
|
| 134 |
+
is_completed=self.is_completed,
|
| 135 |
+
score=self.score
|
| 136 |
+
)
|
server/models.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from enum import Enum
|
| 2 |
+
from typing import List, Optional, Dict, Any
|
| 3 |
+
from dataclasses import dataclass, field
|
| 4 |
+
|
| 5 |
+
class CloudActionType(str, Enum):
|
| 6 |
+
LIST = "list"
|
| 7 |
+
DESCRIBE = "describe"
|
| 8 |
+
MODIFY = "modify"
|
| 9 |
+
LOGS = "logs"
|
| 10 |
+
SUBMIT = "submit"
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class CloudAction:
|
| 14 |
+
action: CloudActionType
|
| 15 |
+
resource_type: Optional[str] = None
|
| 16 |
+
resource_id: Optional[str] = None
|
| 17 |
+
patch: Optional[Dict[str, Any]] = None
|
| 18 |
+
start_time: Optional[str] = None
|
| 19 |
+
end_time: Optional[str] = None
|
| 20 |
+
answer: Optional[str] = None
|
| 21 |
+
|
| 22 |
+
@dataclass
|
| 23 |
+
class CloudObservation:
|
| 24 |
+
resources: Optional[List[Dict[str, Any]]] = None
|
| 25 |
+
details: Optional[Dict[str, Any]] = None
|
| 26 |
+
status: Optional[str] = None
|
| 27 |
+
logs: Optional[List[Dict[str, Any]]] = None
|
| 28 |
+
info: Optional[str] = None
|
| 29 |
+
reward: float = 0.0 # Required by openenv-core 0.1.1
|
| 30 |
+
done: bool = False # Required by openenv-core 0.1.1
|
| 31 |
+
|
| 32 |
+
@dataclass
|
| 33 |
+
class CloudState:
|
| 34 |
+
episode_id: str
|
| 35 |
+
step_count: int
|
| 36 |
+
task_id: str
|
| 37 |
+
is_completed: bool
|
| 38 |
+
score: float
|
server/tasks.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Dict, List, Any
|
| 2 |
+
|
| 3 |
+
TASKS = {
|
| 4 |
+
"easy": {
|
| 5 |
+
"id": "easy",
|
| 6 |
+
"description": "Identify all public S3 buckets in the 'prod' region. Submit their IDs as a comma-separated string.",
|
| 7 |
+
"difficulty": "Easy",
|
| 8 |
+
"reward_threshold": 1.0,
|
| 9 |
+
},
|
| 10 |
+
"medium": {
|
| 11 |
+
"id": "medium",
|
| 12 |
+
"description": "Find the EC2 instance with ID 'i-0abcdef1234567890' in the 'dev' environment. Close port 3389 (RDP) by updating its security group rules to only allow port 22.",
|
| 13 |
+
"difficulty": "Medium",
|
| 14 |
+
"reward_threshold": 1.0,
|
| 15 |
+
},
|
| 16 |
+
"hard": {
|
| 17 |
+
"id": "hard",
|
| 18 |
+
"description": "A rogue IAM role 'iam-role-01' has been performing unauthorized actions. Fetch the 'auth-logs' and identify the IP address that performed 'DeleteStorage'. Submit the IP address.",
|
| 19 |
+
"difficulty": "Hard",
|
| 20 |
+
"reward_threshold": 1.0,
|
| 21 |
+
}
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
def get_task_info(task_id: str) -> Dict[str, Any]:
|
| 25 |
+
return TASKS.get(task_id, {})
|
| 26 |
+
|
| 27 |
+
def list_tasks() -> List[Dict[str, Any]]:
|
| 28 |
+
return list(TASKS.values())
|