Standardize API environment variables, update port to 7860, and bump version to 0.2.0
Browse files- DOCUMENTATION.md +7 -7
- README.md +10 -5
- inference.py +7 -7
- openenv.yaml +1 -1
- pyproject.toml +1 -1
- scripts/baseline_inference.py +1 -1
DOCUMENTATION.md
CHANGED
|
@@ -208,7 +208,7 @@ Every `step()` and `reset()` returns a `CloudObservation`:
|
|
| 208 |
|
| 209 |
## 9. API Reference
|
| 210 |
|
| 211 |
-
Base URL: `http://localhost:
|
| 212 |
|
| 213 |
### `POST /reset`
|
| 214 |
Reset the environment to a specific task.
|
|
@@ -286,7 +286,7 @@ Dashboard UI (the web interface).
|
|
| 286 |
|
| 287 |
## 10. Dashboard UI
|
| 288 |
|
| 289 |
-
The application includes a premium dark-mode cybersecurity dashboard accessible at `http://localhost:
|
| 290 |
|
| 291 |
### Features
|
| 292 |
- **Sidebar Task Selector** β Switch between Easy, Medium, and Hard challenges with one click.
|
|
@@ -314,7 +314,7 @@ pip install -r requirements.txt
|
|
| 314 |
python -m server.app
|
| 315 |
|
| 316 |
# Open in browser
|
| 317 |
-
open http://localhost:
|
| 318 |
```
|
| 319 |
|
| 320 |
### Running the Baseline Agent
|
|
@@ -329,7 +329,7 @@ python scripts/baseline_inference.py
|
|
| 329 |
docker build -t cloud-security-auditor .
|
| 330 |
|
| 331 |
# Run the container
|
| 332 |
-
docker run -p
|
| 333 |
```
|
| 334 |
|
| 335 |
### Hugging Face Spaces Deployment
|
|
@@ -357,14 +357,14 @@ docker run -p 8000:8000 cloud-security-auditor
|
|
| 357 |
|
| 358 |
```yaml
|
| 359 |
name: cloud-security-auditor
|
| 360 |
-
version: "0.
|
| 361 |
description: "A real-world cloud security audit environment for AI agents."
|
| 362 |
hardware:
|
| 363 |
tier: "cpu-small"
|
| 364 |
vCPU: 2
|
| 365 |
RAM: 4Gi
|
| 366 |
-
port:
|
| 367 |
-
entrypoint: "uvicorn server.app:app --host 0.0.0.0 --port
|
| 368 |
tags:
|
| 369 |
- security
|
| 370 |
- cloud
|
|
|
|
| 208 |
|
| 209 |
## 9. API Reference
|
| 210 |
|
| 211 |
+
Base URL: `http://localhost:7860`
|
| 212 |
|
| 213 |
### `POST /reset`
|
| 214 |
Reset the environment to a specific task.
|
|
|
|
| 286 |
|
| 287 |
## 10. Dashboard UI
|
| 288 |
|
| 289 |
+
The application includes a premium dark-mode cybersecurity dashboard accessible at `http://localhost:7860`.
|
| 290 |
|
| 291 |
### Features
|
| 292 |
- **Sidebar Task Selector** β Switch between Easy, Medium, and Hard challenges with one click.
|
|
|
|
| 314 |
python -m server.app
|
| 315 |
|
| 316 |
# Open in browser
|
| 317 |
+
open http://localhost:7860
|
| 318 |
```
|
| 319 |
|
| 320 |
### Running the Baseline Agent
|
|
|
|
| 329 |
docker build -t cloud-security-auditor .
|
| 330 |
|
| 331 |
# Run the container
|
| 332 |
+
docker run -p 7860:7860 cloud-security-auditor
|
| 333 |
```
|
| 334 |
|
| 335 |
### Hugging Face Spaces Deployment
|
|
|
|
| 357 |
|
| 358 |
```yaml
|
| 359 |
name: cloud-security-auditor
|
| 360 |
+
version: "0.2.0"
|
| 361 |
description: "A real-world cloud security audit environment for AI agents."
|
| 362 |
hardware:
|
| 363 |
tier: "cpu-small"
|
| 364 |
vCPU: 2
|
| 365 |
RAM: 4Gi
|
| 366 |
+
port: 7860
|
| 367 |
+
entrypoint: "uvicorn server.app:app --host 0.0.0.0 --port 7860"
|
| 368 |
tags:
|
| 369 |
- security
|
| 370 |
- cloud
|
README.md
CHANGED
|
@@ -9,7 +9,7 @@ pinned: false
|
|
| 9 |
license: apache-2.0
|
| 10 |
---
|
| 11 |
|
| 12 |
-
# π‘οΈ CloudSecurityAuditor OpenEnv
|
| 13 |
|
| 14 |
**CloudSecurityAuditor** is a high-fidelity, standardized AI agent environment designed to simulate real-world cloud security audit scenarios. Built upon the **OpenEnv** specification, it provides a safe, reproducible sandbox where autonomous agents can practice identifying, analyzing, and remediating critical security vulnerabilities in a mock cloud infrastructure.
|
| 15 |
|
|
@@ -54,8 +54,8 @@ This environment is specifically engineered for benchmarking LLM-based security
|
|
| 54 |
If you are running this in a **Hugging Face Space**:
|
| 55 |
|
| 56 |
1. **Examine the API**: The environment is hosted as a FastAPI server. Use the `/ui` endpoint for a visual dashboard.
|
| 57 |
-
2. **Inference**:
|
| 58 |
-
3. **Evaluate**: The
|
| 59 |
|
| 60 |
## π³ Local Deployment
|
| 61 |
|
|
@@ -63,10 +63,15 @@ If you are running this in a **Hugging Face Space**:
|
|
| 63 |
# Clone and Install
|
| 64 |
pip install -r requirements.txt
|
| 65 |
|
| 66 |
-
# Run Server
|
| 67 |
python -m server.app
|
| 68 |
|
| 69 |
-
# Run Baseline
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
python inference.py
|
| 71 |
```
|
| 72 |
|
|
|
|
| 9 |
license: apache-2.0
|
| 10 |
---
|
| 11 |
|
| 12 |
+
# π‘οΈ CloudSecurityAuditor OpenEnv (v0.2.0)
|
| 13 |
|
| 14 |
**CloudSecurityAuditor** is a high-fidelity, standardized AI agent environment designed to simulate real-world cloud security audit scenarios. Built upon the **OpenEnv** specification, it provides a safe, reproducible sandbox where autonomous agents can practice identifying, analyzing, and remediating critical security vulnerabilities in a mock cloud infrastructure.
|
| 15 |
|
|
|
|
| 54 |
If you are running this in a **Hugging Face Space**:
|
| 55 |
|
| 56 |
1. **Examine the API**: The environment is hosted as a FastAPI server. Use the `/ui` endpoint for a visual dashboard.
|
| 57 |
+
2. **Inference (LLM Agent)**: Set `API_BASE_URL` and `API_KEY` (e.g., from LiteLLM proxy) then run `python inference.py`.
|
| 58 |
+
3. **Evaluate**: The AI agent creates standardized logs for automated evaluation.
|
| 59 |
|
| 60 |
## π³ Local Deployment
|
| 61 |
|
|
|
|
| 63 |
# Clone and Install
|
| 64 |
pip install -r requirements.txt
|
| 65 |
|
| 66 |
+
# Run Server (Default port 7860)
|
| 67 |
python -m server.app
|
| 68 |
|
| 69 |
+
# Run Baseline (Rule-based)
|
| 70 |
+
python scripts/baseline_inference.py
|
| 71 |
+
|
| 72 |
+
# Run LLM Agent (Using API_BASE_URL and API_KEY)
|
| 73 |
+
export API_BASE_URL="https://api.openai.com/v1"
|
| 74 |
+
export API_KEY="your-key"
|
| 75 |
python inference.py
|
| 76 |
```
|
| 77 |
|
inference.py
CHANGED
|
@@ -5,9 +5,9 @@ Uses an LLM (via OpenAI-compatible client) to autonomously solve all 3 security
|
|
| 5 |
Emits structured [START], [STEP], [END] logs for automated evaluation.
|
| 6 |
|
| 7 |
Required environment variables:
|
| 8 |
-
API_BASE_URL β The API endpoint for the LLM (e.g., https://
|
| 9 |
-
MODEL_NAME β The model identifier (e.g.,
|
| 10 |
-
|
| 11 |
"""
|
| 12 |
|
| 13 |
import os
|
|
@@ -21,9 +21,9 @@ from openai import OpenAI
|
|
| 21 |
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
# Configuration from environment variables
|
| 23 |
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
-
API_BASE_URL = os.
|
| 25 |
-
|
| 26 |
-
|
| 27 |
LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "")
|
| 28 |
|
| 29 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
|
|
@@ -32,7 +32,7 @@ BENCHMARK_NAME = "cloud-security-auditor"
|
|
| 32 |
# Initialize OpenAI-compatible client
|
| 33 |
client = OpenAI(
|
| 34 |
base_url=API_BASE_URL,
|
| 35 |
-
api_key=
|
| 36 |
)
|
| 37 |
|
| 38 |
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 5 |
Emits structured [START], [STEP], [END] logs for automated evaluation.
|
| 6 |
|
| 7 |
Required environment variables:
|
| 8 |
+
API_BASE_URL β The API endpoint for the LLM (e.g., https://api.openai.com/v1)
|
| 9 |
+
MODEL_NAME β The model identifier (e.g., gpt-4o-mini)
|
| 10 |
+
API_KEY β Your API key for the LLM proxy
|
| 11 |
"""
|
| 12 |
|
| 13 |
import os
|
|
|
|
| 21 |
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 22 |
# Configuration from environment variables
|
| 23 |
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
API_BASE_URL = os.environ.get("API_BASE_URL", "https://api.openai.com/v1")
|
| 25 |
+
API_KEY = os.environ.get("API_KEY", "")
|
| 26 |
+
MODEL_NAME = os.environ.get("MODEL_NAME", "gpt-4o-mini")
|
| 27 |
LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "")
|
| 28 |
|
| 29 |
ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
|
|
|
|
| 32 |
# Initialize OpenAI-compatible client
|
| 33 |
client = OpenAI(
|
| 34 |
base_url=API_BASE_URL,
|
| 35 |
+
api_key=API_KEY,
|
| 36 |
)
|
| 37 |
|
| 38 |
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
openenv.yaml
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
name: cloud-security-auditor
|
| 2 |
-
version: "0.
|
| 3 |
description: "A real-world cloud security audit environment for AI agents."
|
| 4 |
hardware:
|
| 5 |
tier: "cpu-small"
|
|
|
|
| 1 |
name: cloud-security-auditor
|
| 2 |
+
version: "0.2.0"
|
| 3 |
description: "A real-world cloud security audit environment for AI agents."
|
| 4 |
hardware:
|
| 5 |
tier: "cpu-small"
|
pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "cloud-security-auditor"
|
| 7 |
-
version = "0.
|
| 8 |
description = "A real-world cloud security audit environment for AI agents."
|
| 9 |
readme = "README.md"
|
| 10 |
requires-python = ">=3.10"
|
|
|
|
| 4 |
|
| 5 |
[project]
|
| 6 |
name = "cloud-security-auditor"
|
| 7 |
+
version = "0.2.0"
|
| 8 |
description = "A real-world cloud security audit environment for AI agents."
|
| 9 |
readme = "README.md"
|
| 10 |
requires-python = ">=3.10"
|
scripts/baseline_inference.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import requests
|
| 2 |
import json
|
| 3 |
|
| 4 |
-
BASE_URL = "http://localhost:
|
| 5 |
|
| 6 |
def run_baseline_audit(task_id="easy"):
|
| 7 |
print(f"--- Running Baseline for Task: {task_id} ---")
|
|
|
|
| 1 |
import requests
|
| 2 |
import json
|
| 3 |
|
| 4 |
+
BASE_URL = "http://localhost:7860"
|
| 5 |
|
| 6 |
def run_baseline_audit(task_id="easy"):
|
| 7 |
print(f"--- Running Baseline for Task: {task_id} ---")
|