Spaces:

sh4shv4t
/

statestrike-env

Sleeping

File size: 11,157 Bytes

"""
StateStrike Inference Script
============================
Runs an LLM agent against all 3 StateStrike tasks and emits
structured [START]/[STEP]/[END] logs for automated scoring.

Environment variables:
  API_BASE_URL      LLM endpoint (default: https://router.huggingface.co/v1)
  MODEL_NAME        Model identifier (default: Qwen/Qwen2.5-72B-Instruct)
  HF_TOKEN          Hugging Face API token (required, no default)
  LOCAL_IMAGE_NAME  Docker image name if using from_docker_image()
"""

from __future__ import annotations

import asyncio
import json
import os
import re
import textwrap
from typing import List, Optional

from openai import OpenAI

from statestrike_env.environment import StateStrikeEnv
from statestrike_env.models import EndpointChoice, PayloadStrategy, StateStrikeAction

API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME")
BENCHMARK = "statestrike"


def log_start(task: str, env: str, model: str) -> None:
    print(f"[START] task={task} env={env} model={model}", flush=True)


def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
    error_val = error if error else "null"
    print(f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}", flush=True)


def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
    print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)


TASK_CONFIGS = {
    "endpoint_discovery": {"max_steps": 10, "success_threshold": 0.6},
    "vulnerability_probe": {"max_steps": 15, "success_threshold": 0.5},
    "exploit_chain": {"max_steps": 30, "success_threshold": 0.75},
}


SYSTEM_PROMPTS = {
    "endpoint_discovery": textwrap.dedent(
        """
        You are a security agent discovering API endpoints. Try each endpoint ONCE.
        VALID endpoint values (use EXACTLY as written):
          "POST /users", "GET /users/{id}", "POST /orders", "GET /orders", "GET /health"
        VALID payload_strategy values: "valid", "redos", "oversized", "malformed"
        Reply with ONLY a JSON object like:
        {"endpoint": "POST /users", "payload_strategy": "valid", "target_user_id": null}
        Do NOT repeat endpoints you have already tried.
        """
    ).strip(),
    "vulnerability_probe": textwrap.dedent(
        """
        You are a security agent finding API vulnerabilities.
        VALID endpoint values (use EXACTLY):
          "POST /users", "GET /users/{id}", "POST /orders", "GET /orders", "GET /health"
        VALID payload_strategy values: "valid", "redos", "oversized", "malformed"
        Two vulnerabilities:
        1. ReDoS: call "POST /users" with payload_strategy "redos"
        2. DB slow path: first call "POST /orders" 21 times with "valid", then "GET /orders"
        Once you have confirmed ReDoS (latency > 500ms on POST /users redos), switch to building orders: do POST /orders 21 times, then GET /orders.
        Reply with ONLY JSON: {"endpoint": "POST /users", "payload_strategy": "redos", "target_user_id": null}
        """
    ).strip(),
    "exploit_chain": textwrap.dedent(
        """
        You are a security agent executing a 4-step exploit chain.
        VALID endpoint values (use EXACTLY):
          "POST /users", "GET /users/{id}", "POST /orders", "GET /orders", "GET /health"
        VALID payload_strategy values: "valid", "redos", "oversized", "malformed"
        Follow this EXACT sequence:
          Step 1: {"endpoint": "POST /users", "payload_strategy": "valid", "target_user_id": null}
          Steps 2-22: {"endpoint": "POST /orders", "payload_strategy": "valid", "target_user_id": <id from step 1>}
          Step 23: {"endpoint": "GET /orders", "payload_strategy": "valid", "target_user_id": <same id>}
          Step 24+: {"endpoint": "POST /users", "payload_strategy": "redos", "target_user_id": null}
        The observation tells you the current order_count and user_created status.
        Reply with ONLY the JSON for your NEXT action.
        """
    ).strip(),
}

ENDPOINT_ALIASES = {
    "GET /users/{user_id}": "GET /users/{id}",
    "GET /users/:id": "GET /users/{id}",
    "GET /user": "GET /users/{id}",
}

STRATEGY_ALIASES = {
    "none": "valid",
    "attack": "redos",
    "normal": "valid",
    "invalid": "malformed",
}


def build_user_prompt(step: int, last_obs: dict, history: list[str], task_name: str) -> str:
    history_block = "\n".join(history[-4:]) if history else "None"

    order_count = last_obs.get("session_order_count", 0)
    endpoints_found = last_obs.get("endpoints_discovered", [])
    vulns_found = last_obs.get("vulnerabilities_found", [])
    task_progress = last_obs.get("task_progress", 0.0)

    guidance = ""
    if task_name == "endpoint_discovery":
        remaining = [
            e
            for e in [
                "POST /users",
                "GET /users/{id}",
                "POST /orders",
                "GET /orders",
                "GET /health",
            ]
            if e not in endpoints_found
        ]
        guidance = f"Endpoints not yet tried: {remaining}"
    elif task_name == "vulnerability_probe":
        guidance = f"Vulns found so far: {vulns_found}. Order count: {order_count}."
    elif task_name == "exploit_chain":
        guidance = (
            f"order_count={order_count}/21, "
            f"user_created={'POST /users' in endpoints_found}, "
            f"vulns={vulns_found}. "
            f"If order_count < 21, keep doing POST /orders. "
            f"If order_count >= 21 and 'db_degradation' not in vulns, do GET /orders. "
            f"If 'redos' not in vulns, do POST /users with redos."
        )

    return textwrap.dedent(
        f"""
        Step: {step}
        Task progress: {task_progress:.1%}
        {guidance}
        Last response: status={last_obs.get('http_status')} latency={last_obs.get('latency_ms', 0):.0f}ms
        History:
        {history_block}
        What is your next action? Reply with JSON only.
        """
    ).strip()


def _normalize_action_data(data: dict, task_name: str, created_user_id: int | None) -> dict:
    endpoint = str(data.get("endpoint", "")).strip()
    if re.fullmatch(r"GET\s+/users/\d+", endpoint):
        endpoint = "GET /users/{id}"
    endpoint = ENDPOINT_ALIASES.get(endpoint, endpoint)
    if endpoint:
        data["endpoint"] = endpoint

    strategy = str(data.get("payload_strategy", "")).strip().lower()
    if strategy:
        data["payload_strategy"] = STRATEGY_ALIASES.get(strategy, strategy)

    if task_name == "exploit_chain" and created_user_id:
        ep = str(data.get("endpoint", ""))
        if "orders" in ep.lower():
            data["target_user_id"] = created_user_id

    return data


def get_agent_action(
    client: OpenAI,
    task_name: str,
    step: int,
    last_obs: dict,
    history: List[str],
    created_user_id: int | None = None,
) -> StateStrikeAction:
    system = SYSTEM_PROMPTS[task_name]
    user_msg = build_user_prompt(step=step, last_obs=last_obs, history=history, task_name=task_name)

    fallback = StateStrikeAction(
        endpoint=EndpointChoice.HEALTH,
        payload_strategy=PayloadStrategy.VALID,
    )

    try:
        completion = client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {"role": "system", "content": system},
                {"role": "user", "content": user_msg},
            ],
            temperature=0.7,
            max_tokens=100,
        )
        text = (completion.choices[0].message.content or "").strip()
        text = text.removeprefix("```json").removeprefix("```").removesuffix("```").strip()
        data = json.loads(text)
        data = _normalize_action_data(data, task_name=task_name, created_user_id=created_user_id)
        return StateStrikeAction(**data)
    except Exception as exc:
        print(f"[DEBUG] Action parse failed: {exc}", flush=True)
        return fallback


async def run_task(
    env: StateStrikeEnv,
    client: OpenAI,
    task_name: str,
) -> float:
    config = TASK_CONFIGS[task_name]
    max_steps = config["max_steps"]
    success_threshold = config["success_threshold"]

    rewards: List[float] = []
    steps_taken = 0
    score = 0.0
    success = False
    history: List[str] = []
    created_user_id: int | None = None

    log_start(task=task_name, env=BENCHMARK, model=MODEL_NAME)

    try:
        result = await env.reset(task_name=task_name)
        obs = result.observation
        last_obs_dict = obs.model_dump()

        for step in range(1, max_steps + 1):
            if result.done:
                break

            action = get_agent_action(
                client,
                task_name,
                step,
                last_obs_dict,
                history,
                created_user_id=created_user_id,
            )
            action_str = f"{action.endpoint}+{action.payload_strategy}"

            result = await env.step(action)
            obs = result.observation
            reward = result.reward or 0.0
            done = result.done
            error = result.info.get("error") if isinstance(result.info, dict) else None

            rewards.append(reward)
            steps_taken = step
            last_obs_dict = obs.model_dump()

            if task_name == "exploit_chain":
                body = obs.response_body or {}
                maybe_id = body.get("id") if isinstance(body, dict) else None
                if isinstance(maybe_id, int) and created_user_id is None:
                    created_user_id = maybe_id

            log_step(step=step, action=action_str, reward=reward, done=done, error=error)
            history.append(
                f"Step {step}: {action_str} -> status={obs.http_status} "
                f"latency={obs.latency_ms:.0f}ms reward={reward:.2f}"
            )

            if done:
                break

        score = min(max(obs.task_progress, 0.0), 1.0)
        success = score >= success_threshold

    except Exception as exc:
        print(f"[DEBUG] Task {task_name} failed: {exc}", flush=True)
    finally:
        log_end(success=success, steps=steps_taken, score=score, rewards=rewards)

    return score


async def main() -> None:
    client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)

    if LOCAL_IMAGE_NAME:
        env = await StateStrikeEnv.from_docker_image(LOCAL_IMAGE_NAME)
    else:
        env = StateStrikeEnv()

    scores = {}
    for task_name in ["endpoint_discovery", "vulnerability_probe", "exploit_chain"]:
        score = await run_task(env, client, task_name)
        scores[task_name] = score

    await env.close()

    print(f"\n[DEBUG] Final scores: {scores}", flush=True)
    avg = sum(scores.values()) / len(scores)
    print(f"[DEBUG] Average score: {avg:.3f}", flush=True)


if __name__ == "__main__":
    asyncio.run(main())