iitian commited on
Commit
f1a1961
·
0 Parent(s):

feat: Add CloudSecurityAuditor OpenEnv environment

Browse files
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .ipynb_checkpoints/
4
+ venv/
5
+ .env
6
+ .DS_Store
Dockerfile ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # Copy requirement files first
6
+ COPY requirements.txt .
7
+ RUN pip install --no-cache-dir -r requirements.txt
8
+
9
+ # Copy source code
10
+ COPY server/ ./server/
11
+ COPY openenv.yaml .
12
+
13
+ # Expose the API port
14
+ EXPOSE 8000
15
+
16
+ # Start server
17
+ CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]
README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CloudSecurityAuditor OpenEnv
2
+
3
+ A standardized AI agent environment for simulating real-world cloud security audits. Built using the **OpenEnv** specification, it allows agents to interact with a mock cloud infrastructure to identify and remediate vulnerabilities.
4
+
5
+ ## 🌟 Key Features
6
+ - **Typed Models**: Full Pydantic support for actions and observations.
7
+ - **Three Task Tiers**: Includes Easy (Information Gathering), Medium (Remediation), and Hard (Forensic Analysis).
8
+ - **Gymnasium-Compatible API**: Implements `step()`, `reset()`, and `state()` methods.
9
+ - **Reward-Driven**: Scalar rewards from 0.0 to 1.0 based on task completion.
10
+
11
+ ## 🛠 Action Space
12
+ The agent can perform the following actions via the `step()` method:
13
+
14
+ - **`list`**: Lists resources of a specific type (`s3`, `ec2`).
15
+ - **`describe`**: Fetches detailed configuration for a specific resource ID.
16
+ - **`modify`**: Updates resource configurations (e.g., security groups).
17
+ - **`logs`**: Retrieves logs for a specific resource or service.
18
+ - **`submit`**: Submits the final answer for the evaluation tasks.
19
+
20
+ ## 📊 Observation Space
21
+ Each step returns a `CloudObservation` containing:
22
+ - `resources`: A list of discovered resource records.
23
+ - `details`: Metadata for a specific resource.
24
+ - `logs`: Relevant log entries.
25
+ - `status`: Human-readable status message.
26
+ - `info`: Additional environment metadata.
27
+
28
+ ## 📋 Tasks
29
+
30
+ 1. **Easy (S3 Public Audit)**: Identify all public S3 buckets in the 'prod' region.
31
+ 2. **Medium (EC2 Security Patch)**: Find an EC2 instance with RDP port open to the internet and close it.
32
+ 3. **Hard (IAM Log Forensic)**: Trace unauthorized actions in `auth-logs` to identify a rogue IP address.
33
+
34
+ ## 🚀 Setup & Installation
35
+
36
+ ### Local Installation
37
+ ```bash
38
+ pip install -r requirements.txt
39
+ ```
40
+
41
+ ### Running the Server
42
+ ```bash
43
+ python -m server.app
44
+ ```
45
+ The server will start on `http://localhost:8000`.
46
+
47
+ ### Running the Baseline Agent
48
+ ```bash
49
+ python scripts/baseline_inference.py
50
+ ```
51
+
52
+ ## 🐳 Docker Deployment
53
+ To build and run the containerized environment:
54
+ ```bash
55
+ docker build -t cloud-security-auditor-env .
56
+ docker run -p 8000:8000 cloud-security-auditor-env
57
+ ```
58
+
59
+ ## 🤗 Hugging Face Spaces
60
+ This environment is designed to be deployed as an **OpenEnv Space**.
61
+ 1. Create a new Space on Hugging Face.
62
+ 2. Select **Docker** as the SDK.
63
+ 3. Upload the repository contents (including `openenv.yaml` and `Dockerfile`).
64
+ 4. Set the `entrypoint` to match the `uvicorn` command in `openenv.yaml`.
openenv.yaml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: cloud-security-auditor
2
+ version: "0.1.0"
3
+ description: "A real-world cloud security audit environment for AI agents."
4
+ hardware:
5
+ tier: "cpu-small"
6
+ vCPU: 2
7
+ RAM: 4Gi
8
+ port: 8000
9
+ entrypoint: "uvicorn server.app:app --host 0.0.0.0 --port 8000"
10
+ tags:
11
+ - security
12
+ - cloud
13
+ - task-based
14
+ evaluation:
15
+ tasks:
16
+ - id: "easy"
17
+ name: "S3 Public Audit"
18
+ difficulty: "easy"
19
+ - id: "medium"
20
+ name: "EC2 Security Patch"
21
+ difficulty: "medium"
22
+ - id: "hard"
23
+ name: "IAM Log Forensic"
24
+ difficulty: "hard"
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openenv-core>=0.1.1
2
+ fastapi
3
+ uvicorn
4
+ pydantic
5
+ python-multipart
scripts/baseline_inference.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+
4
+ BASE_URL = "http://localhost:8000"
5
+
6
+ def run_baseline_audit(task_id="easy"):
7
+ print(f"--- Running Baseline for Task: {task_id} ---")
8
+
9
+ # 1. Reset environment
10
+ response = requests.post(f"{BASE_URL}/reset", json={"task_id": task_id})
11
+ if response.status_code != 200:
12
+ print(f"Failed to reset: {response.text}")
13
+ return
14
+
15
+ obs_data = response.json()
16
+ obs = obs_data.get("observation", {})
17
+ print(f"Observation Info: {obs.get('info')}")
18
+
19
+ # 2. List S3 buckets
20
+ # Note: wrapping in "action" key to avoid collision with 'action' field in CloudAction
21
+ action_payload = {
22
+ "action": {
23
+ "action": "list",
24
+ "resource_type": "s3"
25
+ }
26
+ }
27
+ response = requests.post(f"{BASE_URL}/step", json=action_payload)
28
+ if response.status_code != 200:
29
+ print(f"Failed on step: {response.text}")
30
+ return
31
+
32
+ step_result = response.json()
33
+ obs = step_result.get("observation", {})
34
+
35
+ resources = obs.get("resources", [])
36
+ print(f"Discovered {len(resources)} S3 buckets.")
37
+
38
+ # 3. Logic to identify public prod buckets
39
+ public_prod_buckets = []
40
+ for r in resources:
41
+ if r.get("public") and r.get("tags", {}).get("env") == "prod":
42
+ public_prod_buckets.append(r["id"])
43
+
44
+ print(f"Identified Public Prod Buckets: {public_prod_buckets}")
45
+
46
+ # 4. Submit answer
47
+ submit_payload = {
48
+ "action": {
49
+ "action": "submit",
50
+ "answer": ",".join(public_prod_buckets)
51
+ }
52
+ }
53
+ response = requests.post(f"{BASE_URL}/step", json=submit_payload)
54
+ step_result = response.json()
55
+ obs = step_result.get("observation", {})
56
+ reward = step_result.get("reward", 0.0)
57
+ done = step_result.get("done", False)
58
+
59
+ print(f"Final Reward: {reward}")
60
+ print(f"Done: {done}")
61
+ print(f"Info: {obs.get('info')}")
62
+
63
+ if __name__ == "__main__":
64
+ try:
65
+ run_baseline_audit("easy")
66
+ except Exception as e:
67
+ print(f"Error: {e}")
server/app.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from openenv_core.env_server import create_fastapi_app
2
+ from .models import CloudAction, CloudObservation
3
+ from .environment import CloudAuditEnv
4
+
5
+ # Initialize the environment
6
+ env = CloudAuditEnv()
7
+
8
+ # Create the FastAPI app
9
+ # Note: create_fastapi_app expects the environment instance,
10
+ # and the Action/Observation models for typing.
11
+ app = create_fastapi_app(env, CloudAction, CloudObservation)
12
+
13
+ if __name__ == "__main__":
14
+ import uvicorn
15
+ uvicorn.run(app, host="0.0.0.0", port=8000)
server/environment.py ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ import datetime
3
+ from typing import Optional, Tuple, Dict, Any, List
4
+ from .models import CloudAction, CloudObservation, CloudState, CloudActionType
5
+
6
+ class CloudAuditEnv:
7
+ def __init__(self):
8
+ self.task_id = "easy"
9
+ self._initialize_state()
10
+
11
+ def _initialize_state(self):
12
+ self.episode_id = str(uuid.uuid4())
13
+ self.step_count = 0
14
+ self.is_completed = False
15
+ self.score = 0.0
16
+
17
+ # Mock Infrastructure
18
+ self.resources = {
19
+ "s3": [
20
+ {"id": "prod-data-001", "region": "us-east-1", "public": True, "tags": {"env": "prod"}},
21
+ {"id": "prod-logs-002", "region": "us-east-1", "public": False, "tags": {"env": "prod"}},
22
+ {"id": "dev-test-01", "region": "us-west-2", "public": True, "tags": {"env": "dev"}},
23
+ ],
24
+ "ec2": [
25
+ {"id": "i-0abcdef1234567890", "type": "t2.micro", "state": "running", "tags": {"env": "dev"},
26
+ "security_groups": [{"id": "sg-01", "rules": [{"port": 22, "cidr": "0.0.0.0/0"}, {"port": 3389, "cidr": "0.0.0.0/0"}]}]},
27
+ {"id": "i-0987654321fedcba0", "type": "m5.large", "state": "running", "tags": {"env": "prod"},
28
+ "security_groups": [{"id": "sg-02", "rules": [{"port": 443, "cidr": "0.0.0.0/0"}]}]},
29
+ ],
30
+ "logs": {
31
+ "auth-logs": [
32
+ {"timestamp": "2026-04-05T10:00:00Z", "user": "admin", "action": "Login", "ip": "1.1.1.1"},
33
+ {"timestamp": "2026-04-05T10:15:00Z", "user": "iam-role-01", "action": "DeleteStorage", "ip": "192.168.1.50"},
34
+ {"timestamp": "2026-04-05T10:30:00Z", "user": "user-02", "action": "ListBuckets", "ip": "2.2.2.2"},
35
+ ]
36
+ }
37
+ }
38
+
39
+ def reset(self, task_id: str = "easy") -> CloudObservation:
40
+ """Required by openenv-core 0.1.1: takes task_id, returns JUST the observation."""
41
+ self.task_id = task_id
42
+ self._initialize_state()
43
+ return CloudObservation(info=f"Environment reset. Task: {self.task_id}", reward=0.0, done=False)
44
+
45
+ def step(self, action: CloudAction) -> CloudObservation:
46
+ """Required by openenv-core 0.1.1: takes action, returns JUST the observation with reward/done fields."""
47
+ self.step_count += 1
48
+ reward = 0.0
49
+ terminated = False
50
+ truncated = self.step_count >= 20 # Limit steps
51
+
52
+ obs = CloudObservation()
53
+
54
+ if action.action == CloudActionType.LIST:
55
+ r_type = action.resource_type
56
+ if r_type in self.resources:
57
+ obs.resources = self.resources[r_type]
58
+ obs.status = f"Listed {len(obs.resources)} {r_type} resources."
59
+ else:
60
+ obs.status = f"Unknown resource type: {r_type}"
61
+
62
+ elif action.action == CloudActionType.DESCRIBE:
63
+ res_id = action.resource_id
64
+ found = False
65
+ for r_type in ["s3", "ec2"]:
66
+ for r in self.resources[r_type]:
67
+ if r["id"] == res_id:
68
+ obs.details = r
69
+ obs.status = f"Described resource {res_id}"
70
+ found = True
71
+ break
72
+ if not found:
73
+ obs.status = f"Resource not found: {res_id}"
74
+
75
+ elif action.action == CloudActionType.MODIFY:
76
+ res_id = action.resource_id
77
+ patch = action.patch
78
+ # Simple EC2 security group patching for Medium task
79
+ if self.task_id == "medium" and res_id == "i-0abcdef1234567890":
80
+ for sg in self.resources["ec2"][0]["security_groups"]:
81
+ if patch and "rules" in patch:
82
+ sg["rules"] = patch["rules"]
83
+ obs.status = f"Updated security groups for {res_id}"
84
+ # Check for reward
85
+ rules = self.resources["ec2"][0]["security_groups"][0]["rules"]
86
+ has_rdp = any(r["port"] == 3389 and r["cidr"] == "0.0.0.0/0" for r in rules)
87
+ if not has_rdp:
88
+ reward = 1.0
89
+ terminated = True
90
+ else:
91
+ obs.status = "Action not permitted or invalid resource."
92
+
93
+ elif action.action == CloudActionType.LOGS:
94
+ log_name = action.resource_id
95
+ if log_name in self.resources["logs"]:
96
+ obs.logs = self.resources["logs"][log_name]
97
+ obs.status = f"Fetched logs for {log_name}"
98
+ else:
99
+ obs.status = f"Logs not found: {log_name}"
100
+
101
+ elif action.action == CloudActionType.SUBMIT:
102
+ # For Easy and Hard tasks
103
+ if self.task_id == "easy":
104
+ # Expecting agent to list public S3 buckets in prod
105
+ if action.answer:
106
+ answers = [a.strip() for a in action.answer.split(",")]
107
+ expected = ["prod-data-001"]
108
+ if set(answers) == set(expected):
109
+ reward = 1.0
110
+ terminated = True
111
+ obs.info = "Correct! Task completed."
112
+ else:
113
+ obs.info = f"Incorrect list of buckets. Got: {answers}"
114
+
115
+ elif self.task_id == "hard":
116
+ # Expecting rogue IP
117
+ if action.answer == "192.168.1.50":
118
+ reward = 1.0
119
+ terminated = True
120
+ obs.info = "Correct Rogue IP identified!"
121
+ else:
122
+ obs.info = f"Wrong IP. Got: {action.answer}"
123
+
124
+ self.score += reward
125
+ obs.reward = reward
126
+ obs.done = terminated or truncated
127
+ return obs
128
+
129
+ def state(self) -> CloudState:
130
+ return CloudState(
131
+ episode_id=self.episode_id,
132
+ step_count=self.step_count,
133
+ task_id=self.task_id,
134
+ is_completed=self.is_completed,
135
+ score=self.score
136
+ )
server/models.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from enum import Enum
2
+ from typing import List, Optional, Dict, Any
3
+ from dataclasses import dataclass, field
4
+
5
+ class CloudActionType(str, Enum):
6
+ LIST = "list"
7
+ DESCRIBE = "describe"
8
+ MODIFY = "modify"
9
+ LOGS = "logs"
10
+ SUBMIT = "submit"
11
+
12
+ @dataclass
13
+ class CloudAction:
14
+ action: CloudActionType
15
+ resource_type: Optional[str] = None
16
+ resource_id: Optional[str] = None
17
+ patch: Optional[Dict[str, Any]] = None
18
+ start_time: Optional[str] = None
19
+ end_time: Optional[str] = None
20
+ answer: Optional[str] = None
21
+
22
+ @dataclass
23
+ class CloudObservation:
24
+ resources: Optional[List[Dict[str, Any]]] = None
25
+ details: Optional[Dict[str, Any]] = None
26
+ status: Optional[str] = None
27
+ logs: Optional[List[Dict[str, Any]]] = None
28
+ info: Optional[str] = None
29
+ reward: float = 0.0 # Required by openenv-core 0.1.1
30
+ done: bool = False # Required by openenv-core 0.1.1
31
+
32
+ @dataclass
33
+ class CloudState:
34
+ episode_id: str
35
+ step_count: int
36
+ task_id: str
37
+ is_completed: bool
38
+ score: float
server/tasks.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, List, Any
2
+
3
+ TASKS = {
4
+ "easy": {
5
+ "id": "easy",
6
+ "description": "Identify all public S3 buckets in the 'prod' region. Submit their IDs as a comma-separated string.",
7
+ "difficulty": "Easy",
8
+ "reward_threshold": 1.0,
9
+ },
10
+ "medium": {
11
+ "id": "medium",
12
+ "description": "Find the EC2 instance with ID 'i-0abcdef1234567890' in the 'dev' environment. Close port 3389 (RDP) by updating its security group rules to only allow port 22.",
13
+ "difficulty": "Medium",
14
+ "reward_threshold": 1.0,
15
+ },
16
+ "hard": {
17
+ "id": "hard",
18
+ "description": "A rogue IAM role 'iam-role-01' has been performing unauthorized actions. Fetch the 'auth-logs' and identify the IP address that performed 'DeleteStorage'. Submit the IP address.",
19
+ "difficulty": "Hard",
20
+ "reward_threshold": 1.0,
21
+ }
22
+ }
23
+
24
+ def get_task_info(task_id: str) -> Dict[str, Any]:
25
+ return TASKS.get(task_id, {})
26
+
27
+ def list_tasks() -> List[Dict[str, Any]]:
28
+ return list(TASKS.values())