Spaces:

sh4shv4t
/

statestrike-env

Sleeping

App Files Files Community

sh4shv4t commited on Apr 7

Commit

ca72cb2

0 Parent(s):

Initial env deployment

Browse files

Files changed (14) hide show

Dockerfile +7 -0
README.md +8 -0
requirements.txt +14 -0
statestrike_env/README.md +8 -0
statestrike_env/__init__.py +139 -0
statestrike_env/__pycache__/__init__.cpython-311.pyc +0 -0
statestrike_env/__pycache__/constants.cpython-313.pyc +0 -0
statestrike_env/__pycache__/models.cpython-311.pyc +0 -0
statestrike_env/__pycache__/server.cpython-313.pyc +0 -0
statestrike_env/constants.py +56 -0
statestrike_env/grader.py +208 -0
statestrike_env/models.py +98 -0
statestrike_env/server.py +400 -0
statestrike_env/session.py +122 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,7 @@

+FROM python:3.11-slim
+WORKDIR /app
+COPY . .
+RUN pip install openenv-core fastapi uvicorn httpx python-dotenv
+ENV HONEYPOT_URL="https://sh4shv4t-statestrike-honeypot.hf.space"
+EXPOSE 7860
+CMD ["python", "-m", "statestrike_env.server", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+title: StateStrike Environment
+emoji: 🎯
+colorFrom: red
+colorTo: purple
+sdk: docker
+pinned: true
+---

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+openenv-core==0.1.0
+fastapi==0.111.0
+uvicorn[standard]==0.30.1
+sqlalchemy==2.0.30
+httpx==0.27.0
+pydantic==2.7.1
+streamlit==1.35.0
+plotly==5.22.0
+python-dotenv==1.0.1
+pytest==8.2.0
+pytest-asyncio==0.23.7
+rich==13.7.1
+websockets==12.0
+portalocker==2.8.2

statestrike_env/README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+title: StateStrike Environment
+emoji: 🎯
+colorFrom: red
+colorTo: purple
+sdk: docker
+pinned: true
+---

statestrike_env/__init__.py ADDED Viewed

	@@ -0,0 +1,139 @@

+from __future__ import annotations
+"""StateStrike OpenEnv-compatible client exports."""
+import json
+from contextlib import AbstractContextManager
+from typing import Any
+from websockets.sync.client import ClientConnection, connect
+from statestrike_env.models import StateStrikeAction, StateStrikeObservation, StateStrikeState
+class _SyncStateStrikeClient(AbstractContextManager["_SyncStateStrikeClient"]):
+    """Synchronous WebSocket client wrapper for reset/step/state calls."""
+    def __init__(self, base_url: str) -> None:
+        """Initialize client.
+        Args:
+            base_url: WebSocket URL including `/ws` path.
+        """
+        normalized = base_url.rstrip("/")
+        self.base_url = normalized if normalized.endswith("/ws") else f"{normalized}/ws"
+        self._conn: ClientConnection | None = None
+    def __enter__(self) -> "_SyncStateStrikeClient":
+        """Open WebSocket connection for environment operations.
+        Returns:
+            Connected client instance.
+        """
+        self._conn = connect(self.base_url)
+        return self
+    def __exit__(self, exc_type: Any, exc: Any, tb: Any) -> None:
+        """Close WebSocket connection.
+        Args:
+            exc_type: Exception type if raised in context block.
+            exc: Exception value if raised in context block.
+            tb: Traceback object if raised in context block.
+        """
+        if self._conn is not None:
+            self._conn.close()
+            self._conn = None
+    def reset(self) -> StateStrikeObservation:
+        """Request environment reset.
+        Returns:
+            Initial observation.
+        Raises:
+            RuntimeError: If the server response is malformed or unsuccessful.
+        """
+        frame = self._request({"method": "reset"})
+        return StateStrikeObservation.model_validate(frame["observation"])
+    def step(self, action: StateStrikeAction) -> StateStrikeObservation:
+        """Execute one environment step.
+        Args:
+            action: Action payload.
+        Returns:
+            Updated observation.
+        Raises:
+            RuntimeError: If the server response is malformed or unsuccessful.
+        """
+        frame = self._request({"method": "step", "action": action.model_dump()})
+        return StateStrikeObservation.model_validate(frame["observation"])
+    def state(self) -> StateStrikeState:
+        """Retrieve current environment state.
+        Returns:
+            Current state model.
+        Raises:
+            RuntimeError: If the server response is malformed or unsuccessful.
+        """
+        frame = self._request({"method": "state"})
+        return StateStrikeState.model_validate(frame["state"])
+    def _request(self, payload: dict[str, Any]) -> dict[str, Any]:
+        """Send request frame and parse server response.
+        Args:
+            payload: JSON-serializable request payload.
+        Returns:
+            Parsed response object.
+        Raises:
+            RuntimeError: If connection is closed or server reports failure.
+        """
+        if self._conn is None:
+            raise RuntimeError("WebSocket connection is not open")
+        self._conn.send(json.dumps(payload))
+        raw = self._conn.recv()
+        frame = json.loads(raw)
+        if not frame.get("ok"):
+            raise RuntimeError(frame.get("error", "Unknown server error"))
+        return frame
+class StateStrikeEnv:
+    """Environment client namespace matching OpenEnv SDK usage patterns."""
+    def __init__(self, base_url: str = "ws://localhost:8001/ws") -> None:
+        """Store base URL for later sync client creation.
+        Args:
+            base_url: Environment WebSocket endpoint.
+        """
+        self.base_url = base_url
+    def sync(self) -> _SyncStateStrikeClient:
+        """Create synchronous context-managed client.
+        Returns:
+            A synchronous environment client implementing reset/step/state.
+        """
+        return _SyncStateStrikeClient(self.base_url)
+__all__ = ["StateStrikeEnv", "StateStrikeAction", "StateStrikeObservation", "StateStrikeState"]

statestrike_env/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (6.4 kB). View file

statestrike_env/__pycache__/constants.cpython-313.pyc ADDED Viewed

Binary file (1.85 kB). View file

statestrike_env/__pycache__/models.cpython-311.pyc ADDED Viewed

Binary file (4.42 kB). View file

statestrike_env/__pycache__/server.cpython-313.pyc ADDED Viewed

Binary file (24.3 kB). View file

statestrike_env/constants.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from __future__ import annotations
+"""Centralized constants for StateStrike environment and reward grading.
+Theory:
+    Consolidating reward and episode hyperparameters avoids hidden magic numbers,
+    supports reproducibility, and aligns with RL experiment hygiene guidance from
+    Sutton & Barto (2018).
+"""
+from dataclasses import dataclass
+ALPHA = 1.0
+BETA = 10.0
+GAMMA = 500.0
+DELTA = 1.0
+BASELINE_WINDOW = 10
+EPISODE_LENGTH = 200
+REDOS_LATENCY_THRESHOLD = 1500.0
+DB_TIMEOUT_THRESHOLD = 3000.0
+CHAIN_REQUIRED_ORDERS = 20
+CHAIN_COOLDOWN_STEPS = 10
+MAX_ACTION_HISTORY = 20
+ACTION_TIMEOUT_SECONDS = 8.0
+DEFAULT_BASELINE_LATENCY_MS = 50.0
+EARLY_TERMINATION_REWARD = -200.0
+@dataclass(frozen=True)
+class RewardConstants:
+    """Typed reward constants passed into the reward grader.
+    Attributes:
+        ALPHA: Latency reward weight.
+        BETA: State-chain completion bonus.
+        GAMMA: Exploitation bounty for severe degradation/failure.
+        DELTA: Penalty magnitude for low-value fuzzing requests.
+        REDOS_LATENCY_THRESHOLD: Latency threshold used to infer ReDoS impact.
+        DB_TIMEOUT_THRESHOLD: Latency threshold used for DB timeout exploitation.
+        CHAIN_REQUIRED_ORDERS: Minimum order count before GET /orders chain bonus.
+        CHAIN_COOLDOWN_STEPS: Minimum steps between chain bonus awards.
+        EARLY_TERMINATION_REWARD: Episode early-stop reward floor.
+        BASELINE_WINDOW: EMA window used for baseline latency updates.
+    """
+    ALPHA: float = ALPHA
+    BETA: float = BETA
+    GAMMA: float = GAMMA
+    DELTA: float = DELTA
+    REDOS_LATENCY_THRESHOLD: float = REDOS_LATENCY_THRESHOLD
+    DB_TIMEOUT_THRESHOLD: float = DB_TIMEOUT_THRESHOLD
+    CHAIN_REQUIRED_ORDERS: int = CHAIN_REQUIRED_ORDERS
+    CHAIN_COOLDOWN_STEPS: int = CHAIN_COOLDOWN_STEPS
+    EARLY_TERMINATION_REWARD: float = EARLY_TERMINATION_REWARD
+    BASELINE_WINDOW: int = BASELINE_WINDOW

statestrike_env/grader.py ADDED Viewed

	@@ -0,0 +1,208 @@

+from __future__ import annotations
+"""Reward grading logic for StateStrike.
+Theory:
+    The reward function follows standard MDP shaping principles from Sutton &
+    Barto (2018): combine dense shaping signals (latency ratio), sparse goal
+    rewards (exploit bounty), and penalties (invalid spam suppression). It also
+    borrows stateful-sequence ideas from RESTler (Atlidakis et al., ICSE 2019)
+    while rewarding infrastructure effects (e.g., ReDoS latency spikes) inspired
+    by Davis et al. (USENIX Security 2018).
+"""
+import logging
+import math
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from statestrike_env.constants import RewardConstants
+    from statestrike_env.models import StateStrikeObservation
+    from statestrike_env.session import StateStrikeSession
+logger = logging.getLogger(__name__)
+def compute_reward(
+    obs: "StateStrikeObservation",
+    session: "StateStrikeSession",
+    constants: "RewardConstants",
+) -> tuple[float, dict[str, float]]:
+    """
+    Compute R_t = α·log(L_t/L_base) + β·S_t + γ·E_t − δ·P_t
+    Theory (Sutton & Barto, 2018, Ch. 3 — Finite MDPs):
+    The reward signal must be designed so the ONLY way to maximize cumulative
+    reward is to achieve the TRUE objective. Each term is chosen to prevent a
+    specific reward-hacking strategy:
+    TERM 1 — α·log(L_t/L_base): Logarithmic latency reward.
+      Why log? Linear reward incentivizes the agent to find ONE massive spike
+      and repeat it. Logarithmic reward gives diminishing returns per repeated
+      exploitation, pushing the agent to discover NEW vulnerabilities.
+      Why ratio? Prevents baseline-anchoring attacks where agent engineers a
+      low baseline then makes normal requests look like spikes.
+      Anti-hack: baseline ONLY updates from successful (latency>0) steps.
+    TERM 2 — β·S_t: State-chain bonus.
+      Fires at most once per CHAIN_COOLDOWN_STEPS steps, and only if
+      order_count has increased since the last award. This prevents the
+      POST→GET farming loop that would yield +5 reward/step for free.
+      Anti-hack: last_chain_bonus_step and post_count_at_last_chain guards.
+    TERM 3 — γ·E_t: Exploitation bounty.
+      Fires EXACTLY ONCE per vulnerability type per episode (one-time flag).
+      Without this, an agent discovering db_degradation would spam GET /orders
+      for +500/step indefinitely. The one-time award correctly signals
+      "you found it" without incentivizing repeated triggering.
+      Anti-hack: redos_bounty_awarded and db_degradation_bounty_awarded flags.
+    TERM 4 — δ·P_t: Fuzzing penalty.
+      Applied only to genuinely fast 400s (latency < 100ms), not to slow 400s
+      (which may indicate actual CPU burn from ReDoS parsing).
+      Threshold tightened from 200ms to 100ms to avoid penalizing legitimate
+      slow-failing payloads.
+      Anti-hack: latency threshold ensures ReDoS probes are not penalized.
+    Reference:
+      - Sutton & Barto (2018): reward shaping and sparse reward design
+      - Atlidakis et al. (ICSE 2019): stateful API exploration objectives
+      - Davis et al. (USENIX 2018): ReDoS computational complexity
+    Args:
+        obs: The observation from the current step.
+        session: The mutable session state (modified in-place for flags).
+        constants: Reward weight constants from constants.py.
+    Returns:
+        Tuple of (scalar_reward, breakdown_dict) where breakdown_dict
+        contains each term's contribution for telemetry and dashboard display.
+    """
+    reward = 0.0
+    breakdown: dict[str, float] = {
+        "latency_reward": 0.0,
+        "chain_bonus": 0.0,
+        "exploit_bounty": 0.0,
+        "fuzz_penalty": 0.0,
+        "total": 0.0,
+    }
+    # Guard: connection failure -> neutral observation, no reward signal.
+    if obs.http_status == 0 or obs.latency_ms == 0.0:
+        breakdown["error"] = 1.0
+        logger.debug("Step %d: connection failure, returning zero reward", obs.step)
+        return 0.0, breakdown
+    # Update rolling baseline only from successful steps.
+    _update_baseline(session, obs.latency_ms, constants.BASELINE_WINDOW)
+    # TERM 1: Logarithmic latency reward.
+    if session.baseline_latency > 0:
+        latency_ratio = obs.latency_ms / session.baseline_latency
+        latency_ratio = max(0.01, min(latency_ratio, 100.0))
+        latency_reward = constants.ALPHA * math.log(latency_ratio)
+        reward += latency_reward
+        breakdown["latency_reward"] = round(latency_reward, 4)
+        logger.debug(
+            "Step %d: latency=%.1fms baseline=%.1fms ratio=%.2f reward=%.3f",
+            obs.step,
+            obs.latency_ms,
+            session.baseline_latency,
+            latency_ratio,
+            latency_reward,
+        )
+    # TERM 2: State-chain bonus (anti-farming guards).
+    chain_bonus = 0.0
+    if _should_award_chain_bonus(obs, session, constants):
+        chain_bonus = constants.BETA
+        session.last_chain_bonus_step = obs.step
+        session.post_count_at_last_chain = session.order_count
+        logger.info(
+            "Step %d: Chain bonus awarded (+%.1f). order_count=%d",
+            obs.step,
+            chain_bonus,
+            session.order_count,
+        )
+    reward += chain_bonus
+    breakdown["chain_bonus"] = chain_bonus
+    # TERM 3: Exploitation bounties (one-time per episode).
+    exploit_bounty = 0.0
+    if (
+        not session.db_degradation_bounty_awarded
+        and (obs.http_status >= 500 or obs.latency_ms > constants.DB_TIMEOUT_THRESHOLD)
+        and obs.action_taken.action_type.value == "get_orders"
+    ):
+        exploit_bounty += constants.GAMMA
+        session.db_degradation_bounty_awarded = True
+        session.triggered_vulns.add("db_degradation")
+        logger.info(
+            "Step %d: DB_DEGRADATION bounty awarded (+%.1f). latency=%.1fms",
+            obs.step,
+            constants.GAMMA,
+            obs.latency_ms,
+        )
+    if (
+        not session.redos_bounty_awarded
+        and obs.latency_ms > constants.REDOS_LATENCY_THRESHOLD
+        and obs.http_status == 400
+        and obs.action_taken.action_type.value == "post_user"
+    ):
+        redos_bounty = constants.GAMMA * 0.8
+        exploit_bounty += redos_bounty
+        session.redos_bounty_awarded = True
+        session.triggered_vulns.add("redos")
+        logger.info(
+            "Step %d: REDOS bounty awarded (+%.1f). latency=%.1fms",
+            obs.step,
+            redos_bounty,
+            obs.latency_ms,
+        )
+    reward += exploit_bounty
+    breakdown["exploit_bounty"] = round(exploit_bounty, 4)
+    # TERM 4: Fuzzing penalty (only genuine fast-fail syntax errors).
+    fuzz_penalty = 0.0
+    if obs.http_status == 400 and obs.latency_ms < 100.0:
+        fuzz_penalty = -constants.DELTA
+        logger.debug("Step %d: Fuzz penalty applied (fast 400, %.1fms)", obs.step, obs.latency_ms)
+    reward += fuzz_penalty
+    breakdown["fuzz_penalty"] = round(fuzz_penalty, 4)
+    breakdown["total"] = round(reward, 4)
+    return reward, breakdown
+def _update_baseline(session: "StateStrikeSession", latency_ms: float, window: int) -> None:
+    """Update rolling baseline latency using exponential moving average."""
+    alpha_ema = 2.0 / (window + 1)
+    if session.baseline_sample_count == 0:
+        session.baseline_latency = latency_ms
+    else:
+        session.baseline_latency = alpha_ema * latency_ms + (1 - alpha_ema) * session.baseline_latency
+    session.baseline_sample_count += 1
+def _should_award_chain_bonus(
+    obs: "StateStrikeObservation",
+    session: "StateStrikeSession",
+    constants: "RewardConstants",
+) -> bool:
+    """Determine if the state-chain bonus should be awarded this step."""
+    if obs.action_taken.action_type.value != "get_orders":
+        return False
+    if session.order_count < constants.CHAIN_REQUIRED_ORDERS:
+        return False
+    steps_since_last = obs.step - session.last_chain_bonus_step
+    if steps_since_last < constants.CHAIN_COOLDOWN_STEPS:
+        return False
+    if session.order_count <= session.post_count_at_last_chain:
+        return False
+    return True

statestrike_env/models.py ADDED Viewed

	@@ -0,0 +1,98 @@

+from __future__ import annotations
+"""Typed action, observation, and state models for StateStrike.
+Theory:
+    Explicit state/action schemas reduce ambiguity in RL interfaces and improve
+    reproducibility when evaluating policies across different backends.
+"""
+from enum import Enum
+from typing import Any, Optional
+from pydantic import BaseModel, Field
+class ActionType(str, Enum):
+    """Discrete actions available to the StateStrike agent."""
+    POST_USER = "post_user"
+    GET_USER = "get_user"
+    POST_ORDER = "post_order"
+    GET_ORDERS = "get_orders"
+    HEALTH_CHECK = "health_check"
+class PayloadStrategy(str, Enum):
+    """Payload generation strategies used by the fuzzing policy."""
+    VALID = "valid"
+    REDOS_ATTACK = "redos"
+    OVERSIZED = "oversized"
+    MALFORMED = "malformed"
+class StateStrikeAction(BaseModel):
+    """Action frame sent by the RL agent.
+    Args:
+        action_type: Target endpoint operation.
+        payload_strategy: Payload mutation strategy.
+        target_user_id: Optional user identifier override.
+    """
+    action_type: ActionType
+    payload_strategy: PayloadStrategy
+    target_user_id: Optional[int] = None
+class StateStrikeObservation(BaseModel):
+    """Step-level feedback returned by the environment.
+    Args:
+        step: Current step index within the episode.
+        action_taken: Action executed during the step.
+        http_status: HTTP status code from honeypot response.
+        latency_ms: End-to-end processing latency in milliseconds.
+        reward: Scalar reward at this step.
+        cumulative_reward: Running reward sum for the episode.
+        baseline_latency_ms: Rolling latency baseline used for normalization.
+        order_count: Number of POST /orders calls in this episode.
+        triggered_vulns: Vulnerability labels discovered so far.
+        done: Terminal signal for episode completion.
+        info: Arbitrary metadata, including reward breakdown.
+    """
+    step: int
+    action_taken: StateStrikeAction
+    http_status: int
+    latency_ms: float
+    reward: float
+    cumulative_reward: float
+    baseline_latency_ms: float
+    order_count: int
+    triggered_vulns: list[str]
+    done: bool
+    info: dict[str, Any] = Field(default_factory=dict)
+class StateStrikeState(BaseModel):
+    """Persistent session state exposed by state().
+    Args:
+        session_id: Unique identifier for current environment episode.
+        step_count: Number of actions executed in current session.
+        cumulative_reward: Running reward sum for current session.
+        order_count: Number of POST /orders calls in session.
+        baseline_latency_ms: Rolling baseline latency in milliseconds.
+        action_history: Most recent action history window.
+        triggered_vulns: Vulnerabilities discovered in this session.
+    """
+    session_id: str
+    step_count: int
+    cumulative_reward: float
+    order_count: int
+    baseline_latency_ms: float
+    action_history: list[StateStrikeAction]
+    triggered_vulns: list[str]

statestrike_env/server.py ADDED Viewed

	@@ -0,0 +1,400 @@

+from __future__ import annotations
+"""OpenEnv-style WebSocket environment server for StateStrike."""
+import asyncio
+import json
+import logging
+import os
+import time
+from contextlib import asynccontextmanager
+from typing import Any
+import httpx
+from dotenv import load_dotenv
+from fastapi import FastAPI, WebSocket, WebSocketDisconnect
+from fastapi.responses import JSONResponse
+try:
+    import openenv_core  # noqa: F401
+except ImportError:  # pragma: no cover - optional import for compatibility signaling.
+    openenv_core = None
+from statestrike_env.constants import (
+    ACTION_TIMEOUT_SECONDS,
+    DEFAULT_BASELINE_LATENCY_MS,
+    EPISODE_LENGTH,
+    RewardConstants,
+)
+from statestrike_env.grader import compute_reward
+from statestrike_env.models import ActionType, PayloadStrategy, StateStrikeAction, StateStrikeObservation, StateStrikeState
+from statestrike_env.session import StateStrikeSession
+load_dotenv()
+logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(name)s | %(levelname)s | %(message)s")
+LOGGER = logging.getLogger(__name__)
+HONEYPOT_URL = os.getenv("HONEYPOT_URL", "http://localhost:8000")
+HOST = os.getenv("STATESTRIKE_ENV_HOST", "0.0.0.0")
+PORT = int(os.getenv("STATESTRIKE_ENV_PORT", "8001"))
+async def wait_for_honeypot(url: str, max_wait: int = 30) -> None:
+    """Block until honeypot is reachable or raise RuntimeError.
+    Args:
+        url: Honeypot base URL.
+        max_wait: Maximum wait time in seconds.
+    Raises:
+        RuntimeError: If honeypot is not reachable before timeout.
+    """
+    deadline = asyncio.get_event_loop().time() + max_wait
+    delay = 1.0
+    async with httpx.AsyncClient() as client:
+        while asyncio.get_event_loop().time() < deadline:
+            try:
+                response = await client.get(f"{url}/health", timeout=3.0)
+                if response.status_code == 200:
+                    LOGGER.info("Honeypot is ready at %s", url)
+                    return
+                LOGGER.warning(
+                    "Honeypot health returned status=%s, retrying in %.1fs...",
+                    response.status_code,
+                    delay,
+                )
+            except Exception as exc:  # noqa: BLE001
+                LOGGER.warning("Honeypot not ready (%s), retrying in %.1fs...", exc, delay)
+            await asyncio.sleep(delay)
+            delay = min(delay * 1.5, 5.0)
+    raise RuntimeError(f"Honeypot at {url} did not become ready within {max_wait}s")
+class StateStrikeEnvironment:
+    """Core reset/step/state implementation.
+    Theory:
+        OpenEnv training loops benefit from persistent transport: WebSocket-based
+        sessions amortize handshake overhead and preserve episode-local state,
+        which aligns with OpenEnv architecture guidance (Burtenshaw, 2025).
+    """
+    def __init__(self, honeypot_url: str, constants: RewardConstants | None = None) -> None:
+        """Initialize environment service.
+        Args:
+            honeypot_url: Base URL for vulnerable honeypot API.
+            constants: Optional reward constants override.
+        """
+        self.honeypot_url = honeypot_url.rstrip("/")
+        self.constants = constants or RewardConstants()
+    async def reset(self, session: StateStrikeSession) -> StateStrikeObservation:
+        """Reset session and return initial observation.
+        Args:
+            session: Session object tied to one client connection.
+        Returns:
+            Initial observation with zero reward.
+        """
+        status, latency_ms, _ = await self._request_honeypot("GET", "/health")
+        baseline = latency_ms if latency_ms > 0 else DEFAULT_BASELINE_LATENCY_MS
+        session.reset(baseline_latency=baseline)
+        action = StateStrikeAction(action_type=ActionType.HEALTH_CHECK, payload_strategy=PayloadStrategy.VALID)
+        obs = StateStrikeObservation(
+            step=0,
+            action_taken=action,
+            http_status=status,
+            latency_ms=latency_ms,
+            reward=0.0,
+            cumulative_reward=0.0,
+            baseline_latency_ms=session.baseline_latency,
+            order_count=0,
+            triggered_vulns=[],
+            done=False,
+            info={"event": "reset"},
+        )
+        return obs
+    async def step(self, session: StateStrikeSession, action: StateStrikeAction) -> StateStrikeObservation:
+        """Execute one environment transition.
+        Args:
+            session: Session object tied to one client connection.
+            action: Agent action.
+        Returns:
+            Updated observation with reward and terminal signal.
+        """
+        request_method, request_path, params, payload = self._translate_action(action, session)
+        status, latency_ms, body = await self._request_honeypot(request_method, request_path, params=params, payload=payload)
+        session.step_count += 1
+        if action.action_type == ActionType.POST_ORDER:
+            session.order_count += 1
+        session.append_action(action)
+        provisional = StateStrikeObservation(
+            step=session.step_count,
+            action_taken=action,
+            http_status=status,
+            latency_ms=latency_ms,
+            reward=0.0,
+            cumulative_reward=session.cumulative_reward,
+            baseline_latency_ms=session.baseline_latency,
+            order_count=session.order_count,
+            triggered_vulns=sorted(session.triggered_vulns),
+            done=False,
+            info={"response": body},
+        )
+        reward, breakdown = compute_reward(provisional, session, self.constants)
+        session.cumulative_reward += reward
+        done = (
+            session.step_count >= EPISODE_LENGTH
+            or session.cumulative_reward < self.constants.EARLY_TERMINATION_REWARD
+        )
+        obs = StateStrikeObservation(
+            step=session.step_count,
+            action_taken=action,
+            http_status=status,
+            latency_ms=latency_ms,
+            reward=reward,
+            cumulative_reward=session.cumulative_reward,
+            baseline_latency_ms=session.baseline_latency,
+            order_count=session.order_count,
+            triggered_vulns=sorted(session.triggered_vulns),
+            done=done,
+            info={"reward_breakdown": breakdown, "response": body},
+        )
+        return obs
+    async def state(self, session: StateStrikeSession) -> StateStrikeState:
+        """Return serializable state snapshot.
+        Args:
+            session: Session object tied to one client connection.
+        Returns:
+            Current state model.
+        """
+        return session.as_state()
+    def _translate_action(
+        self,
+        action: StateStrikeAction,
+        session: StateStrikeSession,
+    ) -> tuple[str, str, dict[str, Any] | None, dict[str, Any] | None]:
+        """Translate action schema into honeypot HTTP request details.
+        Args:
+            action: Agent action.
+            session: Session used for contextual defaults.
+        Returns:
+            Tuple of method, path, query params, and JSON payload.
+        """
+        target_user_id = action.target_user_id or 1
+        if action.action_type == ActionType.POST_USER:
+            email = self._payload_email(action.payload_strategy)
+            return "POST", "/users", None, {"email": email}
+        if action.action_type == ActionType.GET_USER:
+            return "GET", f"/users/{target_user_id}", None, None
+        if action.action_type == ActionType.POST_ORDER:
+            item = self._payload_item(action.payload_strategy)
+            return "POST", "/orders", None, {"user_id": target_user_id, "item": item}
+        if action.action_type == ActionType.GET_ORDERS:
+            return "GET", "/orders", {"user_id": target_user_id}, None
+        return "GET", "/health", None, None
+    @staticmethod
+    def _payload_email(strategy: PayloadStrategy) -> str:
+        """Build email-like payload for POST /users action.
+        Args:
+            strategy: Payload strategy enum.
+        Returns:
+            Strategy-specific string payload.
+        """
+        if strategy == PayloadStrategy.REDOS_ATTACK:
+            return "a" * 39 + "!"
+        if strategy == PayloadStrategy.OVERSIZED:
+            return "A" * 4096
+        if strategy == PayloadStrategy.MALFORMED:
+            return "@@@"
+        return "validuser123"
+    @staticmethod
+    def _payload_item(strategy: PayloadStrategy) -> str:
+        """Build order item payload.
+        Args:
+            strategy: Payload strategy enum.
+        Returns:
+            Strategy-specific order item string.
+        """
+        if strategy == PayloadStrategy.OVERSIZED:
+            return "item_" + ("X" * 2048)
+        if strategy == PayloadStrategy.MALFORMED:
+            return ""
+        return "standard_item"
+    async def _request_honeypot(
+        self,
+        method: str,
+        path: str,
+        *,
+        params: dict[str, Any] | None = None,
+        payload: dict[str, Any] | None = None,
+    ) -> tuple[int, float, dict[str, Any]]:
+        """Execute honeypot request and normalize response metadata.
+        Args:
+            method: HTTP method.
+            path: Relative path.
+            params: Optional query parameters.
+            payload: Optional JSON body.
+        Returns:
+            Tuple of status code, latency milliseconds, and parsed response body.
+        """
+        url = f"{self.honeypot_url}{path}"
+        started = time.perf_counter()
+        try:
+            async with httpx.AsyncClient(timeout=ACTION_TIMEOUT_SECONDS) as client:
+                response = await client.request(method, url, params=params, json=payload)
+            elapsed_ms = (time.perf_counter() - started) * 1000.0
+            header_latency = response.headers.get("X-Process-Time-Ms")
+            latency_ms = float(header_latency) if header_latency else elapsed_ms
+            body = response.json() if response.content else {}
+            return response.status_code, latency_ms, body
+        except (httpx.RequestError, ValueError) as exc:
+            LOGGER.warning("Honeypot request failed method=%s path=%s error=%s", method, path, exc)
+            return 0, 0.0, {"error": str(exc), "synthetic": True}
+@asynccontextmanager
+async def lifespan(_: FastAPI):
+    """Block API startup until honeypot health endpoint is reachable."""
+    await wait_for_honeypot(HONEYPOT_URL, max_wait=30)
+    yield
+app = FastAPI(title="StateStrike OpenEnv Server", version="1.0.0", lifespan=lifespan)
+env_service = StateStrikeEnvironment(HONEYPOT_URL)
+http_debug_session = StateStrikeSession.new_session()
+# OpenEnv uses WebSocket (/ws) for persistent sessions rather than
+# stateless HTTP. Each step() is a lightweight frame over an existing
+# connection (~0.1ms overhead vs ~10-50ms TCP handshake per HTTP call).
+# Reference: openenv-course module-5, burtenshaw/openenv-scaling
+# This architecture enables high-frequency RL training loops.
+@app.websocket("/ws")
+async def websocket_env(websocket: WebSocket) -> None:
+    """Run one isolated environment loop per WebSocket client.
+    Args:
+        websocket: Connected client transport.
+    """
+    await websocket.accept()
+    session = StateStrikeSession.new_session()
+    LOGGER.info("WebSocket session started session_id=%s", session.session_id)
+    try:
+        while True:
+            frame = await websocket.receive_text()
+            request = json.loads(frame)
+            method = request.get("method")
+            if method == "reset":
+                obs = await env_service.reset(session)
+                await websocket.send_json({"ok": True, "observation": obs.model_dump()})
+                continue
+            if method == "step":
+                action_payload = request.get("action", {})
+                action = StateStrikeAction.model_validate(action_payload)
+                obs = await env_service.step(session, action)
+                await websocket.send_json({"ok": True, "observation": obs.model_dump()})
+                continue
+            if method == "state":
+                state = await env_service.state(session)
+                await websocket.send_json({"ok": True, "state": state.model_dump()})
+                continue
+            await websocket.send_json({"ok": False, "error": f"Unknown method: {method}"})
+    except (WebSocketDisconnect, json.JSONDecodeError):
+        LOGGER.info("WebSocket session ended session_id=%s", session.session_id)
+@app.get("/reset")
+async def reset_http() -> JSONResponse:
+    """HTTP debug endpoint for reset semantics.
+    Returns:
+        JSON response containing reset observation.
+    """
+    obs = await env_service.reset(http_debug_session)
+    return JSONResponse(obs.model_dump())
+@app.post("/step")
+async def step_http(action: StateStrikeAction) -> JSONResponse:
+    """HTTP debug endpoint for step semantics.
+    Args:
+        action: Action payload.
+    Returns:
+        JSON response containing post-step observation.
+    """
+    obs = await env_service.step(http_debug_session, action)
+    return JSONResponse(obs.model_dump())
+@app.get("/state")
+async def state_http() -> JSONResponse:
+    """HTTP debug endpoint for state semantics.
+    Returns:
+        JSON response containing current session state.
+    """
+    state = await env_service.state(http_debug_session)
+    return JSONResponse(state.model_dump())
+def main() -> None:
+    """Entrypoint for running environment server via python -m."""
+    import uvicorn
+    uvicorn.run("statestrike_env.server:app", host=HOST, port=PORT, reload=False)
+if __name__ == "__main__":
+    main()

statestrike_env/session.py ADDED Viewed

	@@ -0,0 +1,122 @@

+from __future__ import annotations
+"""Session state manager for per-agent environment isolation."""
+from dataclasses import dataclass, field
+from uuid import uuid4
+from statestrike_env.constants import DEFAULT_BASELINE_LATENCY_MS, MAX_ACTION_HISTORY
+from statestrike_env.models import StateStrikeAction, StateStrikeState
+@dataclass
+class StateStrikeSession:
+    """Mutable per-WebSocket environment session.
+    Attributes:
+        session_id: Current episode UUID.
+        step_count: Number of steps taken in current episode.
+        cumulative_reward: Running reward total.
+        order_count: Number of POST /orders actions issued.
+        baseline_latency: Rolling average latency used in reward normalization.
+        action_history: Most recent action history window.
+        triggered_vulns: Vulnerabilities discovered in current episode.
+        redos_bounty_awarded: One-time ReDoS bounty guard.
+        db_degradation_bounty_awarded: One-time DB degradation bounty guard.
+        last_chain_bonus_step: Last step where chain bonus was awarded.
+        post_count_at_last_chain: Order count snapshot at last chain award.
+        baseline_sample_count: Number of successful baseline samples seen.
+    """
+    session_id: str
+    step_count: int = 0
+    cumulative_reward: float = 0.0
+    order_count: int = 0
+    baseline_latency: float = DEFAULT_BASELINE_LATENCY_MS
+    action_history: list[StateStrikeAction] = field(default_factory=list)
+    triggered_vulns: set[str] = field(default_factory=set)
+    # Anti-hacking: one-time flags so each bounty fires exactly once per episode.
+    redos_bounty_awarded: bool = False
+    db_degradation_bounty_awarded: bool = False
+    # Anti-hacking: chain bonus can only fire once between meaningful progress windows.
+    last_chain_bonus_step: int = -10
+    post_count_at_last_chain: int = 0
+    # Baseline integrity: updated only on successful (non-zero latency) steps.
+    baseline_sample_count: int = 0
+    @classmethod
+    def new_session(cls) -> StateStrikeSession:
+        """Create a new initialized session.
+        Returns:
+            Newly initialized StateStrikeSession instance.
+        """
+        return cls(session_id=str(uuid4()))
+    def reset(self, baseline_latency: float = DEFAULT_BASELINE_LATENCY_MS) -> None:
+        """Reset session in-place for a new episode.
+        Args:
+            baseline_latency: Fresh baseline latency in milliseconds.
+        """
+        self.session_id = str(uuid4())
+        self.step_count = 0
+        self.cumulative_reward = 0.0
+        self.order_count = 0
+        self.baseline_latency = baseline_latency
+        self.action_history.clear()
+        self.triggered_vulns.clear()
+        self.redos_bounty_awarded = False
+        self.db_degradation_bounty_awarded = False
+        self.last_chain_bonus_step = -10
+        self.post_count_at_last_chain = 0
+        self.baseline_sample_count = 1 if baseline_latency > 0 else 0
+    def record_latency(self, latency_ms: float) -> float:
+        """Update baseline latency using EMA from successful samples.
+        Args:
+            latency_ms: Observed latency for the current step.
+        Returns:
+            Updated baseline latency.
+        """
+        sample = max(latency_ms, 1.0)
+        alpha_ema = 2.0 / (10 + 1)
+        if self.baseline_sample_count == 0:
+            self.baseline_latency = sample
+        else:
+            self.baseline_latency = alpha_ema * sample + (1 - alpha_ema) * self.baseline_latency
+        self.baseline_sample_count += 1
+        return self.baseline_latency
+    def append_action(self, action: StateStrikeAction) -> None:
+        """Append action while enforcing history length constraints.
+        Args:
+            action: Action to append.
+        """
+        self.action_history.append(action)
+        if len(self.action_history) > MAX_ACTION_HISTORY:
+            self.action_history.pop(0)
+    def as_state(self) -> StateStrikeState:
+        """Convert mutable session internals to external state model.
+        Returns:
+            Immutable API-safe state representation.
+        """
+        return StateStrikeState(
+            session_id=self.session_id,
+            step_count=self.step_count,
+            cumulative_reward=self.cumulative_reward,
+            order_count=self.order_count,
+            baseline_latency_ms=self.baseline_latency,
+            action_history=list(self.action_history),
+            triggered_vulns=sorted(self.triggered_vulns),
+        )