Spaces:

iitian
/

open_env

Sleeping

App Files Files Community

iitian commited on 16 days ago

Commit

f1a1961

0 Parent(s):

feat: Add CloudSecurityAuditor OpenEnv environment

Browse files

Files changed (10) hide show

.gitignore +6 -0
Dockerfile +17 -0
README.md +64 -0
openenv.yaml +24 -0
requirements.txt +5 -0
scripts/baseline_inference.py +67 -0
server/app.py +15 -0
server/environment.py +136 -0
server/models.py +38 -0
server/tasks.py +28 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+__pycache__/
+*.pyc
+.ipynb_checkpoints/
+venv/
+.env
+.DS_Store

Dockerfile ADDED Viewed

	@@ -0,0 +1,17 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Copy requirement files first
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy source code
+COPY server/ ./server/
+COPY openenv.yaml .
+# Expose the API port
+EXPOSE 8000
+# Start server
+CMD ["uvicorn", "server.app:app", "--host", "0.0.0.0", "--port", "8000"]

README.md ADDED Viewed

	@@ -0,0 +1,64 @@

+# CloudSecurityAuditor OpenEnv
+A standardized AI agent environment for simulating real-world cloud security audits. Built using the **OpenEnv** specification, it allows agents to interact with a mock cloud infrastructure to identify and remediate vulnerabilities.
+## 🌟 Key Features
+- **Typed Models**: Full Pydantic support for actions and observations.
+- **Three Task Tiers**: Includes Easy (Information Gathering), Medium (Remediation), and Hard (Forensic Analysis).
+- **Gymnasium-Compatible API**: Implements `step()`, `reset()`, and `state()` methods.
+- **Reward-Driven**: Scalar rewards from 0.0 to 1.0 based on task completion.
+## 🛠 Action Space
+The agent can perform the following actions via the `step()` method:
+- **`list`**: Lists resources of a specific type (`s3`, `ec2`).
+- **`describe`**: Fetches detailed configuration for a specific resource ID.
+- **`modify`**: Updates resource configurations (e.g., security groups).
+- **`logs`**: Retrieves logs for a specific resource or service.
+- **`submit`**: Submits the final answer for the evaluation tasks.
+## 📊 Observation Space
+Each step returns a `CloudObservation` containing:
+- `resources`: A list of discovered resource records.
+- `details`: Metadata for a specific resource.
+- `logs`: Relevant log entries.
+- `status`: Human-readable status message.
+- `info`: Additional environment metadata.
+## 📋 Tasks
+1. **Easy (S3 Public Audit)**: Identify all public S3 buckets in the 'prod' region.
+2. **Medium (EC2 Security Patch)**: Find an EC2 instance with RDP port open to the internet and close it.
+3. **Hard (IAM Log Forensic)**: Trace unauthorized actions in `auth-logs` to identify a rogue IP address.
+## 🚀 Setup & Installation
+### Local Installation
+```bash
+pip install -r requirements.txt
+```
+### Running the Server
+```bash
+python -m server.app
+```
+The server will start on `http://localhost:8000`.
+### Running the Baseline Agent
+```bash
+python scripts/baseline_inference.py
+```
+## 🐳 Docker Deployment
+To build and run the containerized environment:
+```bash
+docker build -t cloud-security-auditor-env .
+docker run -p 8000:8000 cloud-security-auditor-env
+```
+## 🤗 Hugging Face Spaces
+This environment is designed to be deployed as an **OpenEnv Space**.
+1. Create a new Space on Hugging Face.
+2. Select **Docker** as the SDK.
+3. Upload the repository contents (including `openenv.yaml` and `Dockerfile`).
+4. Set the `entrypoint` to match the `uvicorn` command in `openenv.yaml`.

openenv.yaml ADDED Viewed

	@@ -0,0 +1,24 @@

+name: cloud-security-auditor
+version: "0.1.0"
+description: "A real-world cloud security audit environment for AI agents."
+hardware:
+  tier: "cpu-small"
+  vCPU: 2
+  RAM: 4Gi
+port: 8000
+entrypoint: "uvicorn server.app:app --host 0.0.0.0 --port 8000"
+tags:
+  - security
+  - cloud
+  - task-based
+evaluation:
+  tasks:
+    - id: "easy"
+      name: "S3 Public Audit"
+      difficulty: "easy"
+    - id: "medium"
+      name: "EC2 Security Patch"
+      difficulty: "medium"
+    - id: "hard"
+      name: "IAM Log Forensic"
+      difficulty: "hard"

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+openenv-core>=0.1.1
+fastapi
+uvicorn
+pydantic
+python-multipart

scripts/baseline_inference.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import requests
+import json
+BASE_URL = "http://localhost:8000"
+def run_baseline_audit(task_id="easy"):
+    print(f"--- Running Baseline for Task: {task_id} ---")
+    # 1. Reset environment
+    response = requests.post(f"{BASE_URL}/reset", json={"task_id": task_id})
+    if response.status_code != 200:
+        print(f"Failed to reset: {response.text}")
+        return
+    obs_data = response.json()
+    obs = obs_data.get("observation", {})
+    print(f"Observation Info: {obs.get('info')}")
+    # 2. List S3 buckets
+    # Note: wrapping in "action" key to avoid collision with 'action' field in CloudAction
+    action_payload = {
+        "action": {
+            "action": "list",
+            "resource_type": "s3"
+        }
+    }
+    response = requests.post(f"{BASE_URL}/step", json=action_payload)
+    if response.status_code != 200:
+        print(f"Failed on step: {response.text}")
+        return
+    step_result = response.json()
+    obs = step_result.get("observation", {})
+    resources = obs.get("resources", [])
+    print(f"Discovered {len(resources)} S3 buckets.")
+    # 3. Logic to identify public prod buckets
+    public_prod_buckets = []
+    for r in resources:
+        if r.get("public") and r.get("tags", {}).get("env") == "prod":
+            public_prod_buckets.append(r["id"])
+    print(f"Identified Public Prod Buckets: {public_prod_buckets}")
+    # 4. Submit answer
+    submit_payload = {
+        "action": {
+            "action": "submit",
+            "answer": ",".join(public_prod_buckets)
+        }
+    }
+    response = requests.post(f"{BASE_URL}/step", json=submit_payload)
+    step_result = response.json()
+    obs = step_result.get("observation", {})
+    reward = step_result.get("reward", 0.0)
+    done = step_result.get("done", False)
+    print(f"Final Reward: {reward}")
+    print(f"Done: {done}")
+    print(f"Info: {obs.get('info')}")
+if __name__ == "__main__":
+    try:
+        run_baseline_audit("easy")
+    except Exception as e:
+        print(f"Error: {e}")

server/app.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from openenv_core.env_server import create_fastapi_app
+from .models import CloudAction, CloudObservation
+from .environment import CloudAuditEnv
+# Initialize the environment
+env = CloudAuditEnv()
+# Create the FastAPI app
+# Note: create_fastapi_app expects the environment instance,
+# and the Action/Observation models for typing.
+app = create_fastapi_app(env, CloudAction, CloudObservation)
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

server/environment.py ADDED Viewed

	@@ -0,0 +1,136 @@

+import uuid
+import datetime
+from typing import Optional, Tuple, Dict, Any, List
+from .models import CloudAction, CloudObservation, CloudState, CloudActionType
+class CloudAuditEnv:
+    def __init__(self):
+        self.task_id = "easy"
+        self._initialize_state()
+    def _initialize_state(self):
+        self.episode_id = str(uuid.uuid4())
+        self.step_count = 0
+        self.is_completed = False
+        self.score = 0.0
+        # Mock Infrastructure
+        self.resources = {
+            "s3": [
+                {"id": "prod-data-001", "region": "us-east-1", "public": True, "tags": {"env": "prod"}},
+                {"id": "prod-logs-002", "region": "us-east-1", "public": False, "tags": {"env": "prod"}},
+                {"id": "dev-test-01", "region": "us-west-2", "public": True, "tags": {"env": "dev"}},
+            ],
+            "ec2": [
+                {"id": "i-0abcdef1234567890", "type": "t2.micro", "state": "running", "tags": {"env": "dev"},
+                 "security_groups": [{"id": "sg-01", "rules": [{"port": 22, "cidr": "0.0.0.0/0"}, {"port": 3389, "cidr": "0.0.0.0/0"}]}]},
+                {"id": "i-0987654321fedcba0", "type": "m5.large", "state": "running", "tags": {"env": "prod"},
+                 "security_groups": [{"id": "sg-02", "rules": [{"port": 443, "cidr": "0.0.0.0/0"}]}]},
+            ],
+            "logs": {
+                "auth-logs": [
+                    {"timestamp": "2026-04-05T10:00:00Z", "user": "admin", "action": "Login", "ip": "1.1.1.1"},
+                    {"timestamp": "2026-04-05T10:15:00Z", "user": "iam-role-01", "action": "DeleteStorage", "ip": "192.168.1.50"},
+                    {"timestamp": "2026-04-05T10:30:00Z", "user": "user-02", "action": "ListBuckets", "ip": "2.2.2.2"},
+                ]
+            }
+        }
+    def reset(self, task_id: str = "easy") -> CloudObservation:
+        """Required by openenv-core 0.1.1: takes task_id, returns JUST the observation."""
+        self.task_id = task_id
+        self._initialize_state()
+        return CloudObservation(info=f"Environment reset. Task: {self.task_id}", reward=0.0, done=False)
+    def step(self, action: CloudAction) -> CloudObservation:
+        """Required by openenv-core 0.1.1: takes action, returns JUST the observation with reward/done fields."""
+        self.step_count += 1
+        reward = 0.0
+        terminated = False
+        truncated = self.step_count >= 20  # Limit steps
+        obs = CloudObservation()
+        if action.action == CloudActionType.LIST:
+            r_type = action.resource_type
+            if r_type in self.resources:
+                obs.resources = self.resources[r_type]
+                obs.status = f"Listed {len(obs.resources)} {r_type} resources."
+            else:
+                obs.status = f"Unknown resource type: {r_type}"
+        elif action.action == CloudActionType.DESCRIBE:
+            res_id = action.resource_id
+            found = False
+            for r_type in ["s3", "ec2"]:
+                for r in self.resources[r_type]:
+                    if r["id"] == res_id:
+                        obs.details = r
+                        obs.status = f"Described resource {res_id}"
+                        found = True
+                        break
+            if not found:
+                obs.status = f"Resource not found: {res_id}"
+        elif action.action == CloudActionType.MODIFY:
+            res_id = action.resource_id
+            patch = action.patch
+            # Simple EC2 security group patching for Medium task
+            if self.task_id == "medium" and res_id == "i-0abcdef1234567890":
+                for sg in self.resources["ec2"][0]["security_groups"]:
+                    if patch and "rules" in patch:
+                        sg["rules"] = patch["rules"]
+                obs.status = f"Updated security groups for {res_id}"
+                # Check for reward
+                rules = self.resources["ec2"][0]["security_groups"][0]["rules"]
+                has_rdp = any(r["port"] == 3389 and r["cidr"] == "0.0.0.0/0" for r in rules)
+                if not has_rdp:
+                    reward = 1.0
+                    terminated = True
+            else:
+                obs.status = "Action not permitted or invalid resource."
+        elif action.action == CloudActionType.LOGS:
+            log_name = action.resource_id
+            if log_name in self.resources["logs"]:
+                obs.logs = self.resources["logs"][log_name]
+                obs.status = f"Fetched logs for {log_name}"
+            else:
+                obs.status = f"Logs not found: {log_name}"
+        elif action.action == CloudActionType.SUBMIT:
+            # For Easy and Hard tasks
+            if self.task_id == "easy":
+                # Expecting agent to list public S3 buckets in prod
+                if action.answer:
+                    answers = [a.strip() for a in action.answer.split(",")]
+                    expected = ["prod-data-001"]
+                    if set(answers) == set(expected):
+                        reward = 1.0
+                        terminated = True
+                        obs.info = "Correct! Task completed."
+                    else:
+                        obs.info = f"Incorrect list of buckets. Got: {answers}"
+            elif self.task_id == "hard":
+                # Expecting rogue IP
+                if action.answer == "192.168.1.50":
+                    reward = 1.0
+                    terminated = True
+                    obs.info = "Correct Rogue IP identified!"
+                else:
+                    obs.info = f"Wrong IP. Got: {action.answer}"
+        self.score += reward
+        obs.reward = reward
+        obs.done = terminated or truncated
+        return obs
+    def state(self) -> CloudState:
+        return CloudState(
+            episode_id=self.episode_id,
+            step_count=self.step_count,
+            task_id=self.task_id,
+            is_completed=self.is_completed,
+            score=self.score
+        )

server/models.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from enum import Enum
+from typing import List, Optional, Dict, Any
+from dataclasses import dataclass, field
+class CloudActionType(str, Enum):
+    LIST = "list"
+    DESCRIBE = "describe"
+    MODIFY = "modify"
+    LOGS = "logs"
+    SUBMIT = "submit"
+@dataclass
+class CloudAction:
+    action: CloudActionType
+    resource_type: Optional[str] = None
+    resource_id: Optional[str] = None
+    patch: Optional[Dict[str, Any]] = None
+    start_time: Optional[str] = None
+    end_time: Optional[str] = None
+    answer: Optional[str] = None
+@dataclass
+class CloudObservation:
+    resources: Optional[List[Dict[str, Any]]] = None
+    details: Optional[Dict[str, Any]] = None
+    status: Optional[str] = None
+    logs: Optional[List[Dict[str, Any]]] = None
+    info: Optional[str] = None
+    reward: float = 0.0          # Required by openenv-core 0.1.1
+    done: bool = False           # Required by openenv-core 0.1.1
+@dataclass
+class CloudState:
+    episode_id: str
+    step_count: int
+    task_id: str
+    is_completed: bool
+    score: float

server/tasks.py ADDED Viewed

	@@ -0,0 +1,28 @@

+from typing import Dict, List, Any
+TASKS = {
+    "easy": {
+        "id": "easy",
+        "description": "Identify all public S3 buckets in the 'prod' region. Submit their IDs as a comma-separated string.",
+        "difficulty": "Easy",
+        "reward_threshold": 1.0,
+    },
+    "medium": {
+        "id": "medium",
+        "description": "Find the EC2 instance with ID 'i-0abcdef1234567890' in the 'dev' environment. Close port 3389 (RDP) by updating its security group rules to only allow port 22.",
+        "difficulty": "Medium",
+        "reward_threshold": 1.0,
+    },
+    "hard": {
+        "id": "hard",
+        "description": "A rogue IAM role 'iam-role-01' has been performing unauthorized actions. Fetch the 'auth-logs' and identify the IP address that performed 'DeleteStorage'. Submit the IP address.",
+        "difficulty": "Hard",
+        "reward_threshold": 1.0,
+    }
+}
+def get_task_info(task_id: str) -> Dict[str, Any]:
+    return TASKS.get(task_id, {})
+def list_tasks() -> List[Dict[str, Any]]:
+    return list(TASKS.values())