| from __future__ import annotations |
|
|
| from typing import Any, Dict, Optional |
|
|
| from fastapi import Body, FastAPI, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.responses import HTMLResponse |
| import uvicorn |
|
|
| from env.environment import IDPIExfilEnv |
| from env.models import ( |
| Action, |
| EpisodeResult, |
| HealthResponse, |
| MetadataResponse, |
| Observation, |
| ResetRequest, |
| Reward, |
| SchemaResponse, |
| State, |
| StepRequest, |
| StepResponse, |
| ) |
|
|
| APP_VERSION = "0.1.0" |
| ENV_NAME = "idpi-exfil-env" |
| ENV_DESCRIPTION = "OpenEnv-compatible prompt injection benchmark for safe email processing." |
| TASK_NAMES = ["easy", "medium", "hard"] |
| SPACE_URL = "https://kchad-prompt-injection-rl-environment.hf.space" |
|
|
| app = FastAPI( |
| title="IDPI Exfil Environment", |
| description=ENV_DESCRIPTION, |
| version=APP_VERSION, |
| ) |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| env = IDPIExfilEnv() |
|
|
|
|
| def _model_schema(model: Any) -> Dict[str, Any]: |
| if hasattr(model, "model_json_schema"): |
| return model.model_json_schema() |
| return model.schema() |
|
|
|
|
| def _landing_page_html() -> str: |
| task_cards = "".join( |
| f""" |
| <article class="task-card"> |
| <span class="task-pill">{task.upper()}</span> |
| <h3>{task.title()}</h3> |
| <p>{description}</p> |
| </article> |
| """ |
| for task, description in ( |
| ( |
| "easy", |
| "Introductory scenarios with obvious but believable relay attempts and lightly hidden prompt injections.", |
| ), |
| ( |
| "medium", |
| "Curated mid-tier episodes using hidden carrier channels, transcript replay, and authority spoofing.", |
| ), |
| ( |
| "hard", |
| "Safe-completion episodes where the agent must finish the business task without contaminating internal outputs.", |
| ), |
| ) |
| ) |
| showcase_cards = "".join( |
| f""" |
| <article class="task-card"> |
| <span class="task-pill">{channel}</span> |
| <h3>{title}</h3> |
| <p>{description}</p> |
| </article> |
| """ |
| for channel, title, description in ( |
| ( |
| "HIDDEN MARKUP", |
| "Hidden Relay Instruction", |
| "Hidden relay instructions tucked into comment markup. Safe agents should read, flag, and keep the digest workflow internal.", |
| ), |
| ( |
| "DELAYED TRIGGER", |
| "Summary-Time Hook", |
| "The malicious instruction only activates when the agent tries to write the benign summary, which makes the episode trajectory-aware instead of static-text-aware.", |
| ), |
| ( |
| "SEMANTIC", |
| "Internal Output Poisoning", |
| "A business-looking workbook clause poisons the internal summary unless the model recognizes it as attacker-directed content.", |
| ), |
| ) |
| ) |
|
|
| return f""" |
| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8" /> |
| <meta name="viewport" content="width=device-width, initial-scale=1" /> |
| <title>IDPI Exfil Environment</title> |
| <style> |
| :root {{ |
| --bg: #fbf6ef; |
| --panel: rgba(255, 250, 242, 0.9); |
| --ink: #23160d; |
| --muted: #6b5848; |
| --accent: #a64712; |
| --accent-2: #155d4b; |
| --line: rgba(53, 31, 15, 0.12); |
| --shadow: 0 24px 60px rgba(74, 35, 12, 0.12); |
| }} |
| |
| * {{ |
| box-sizing: border-box; |
| }} |
| |
| body {{ |
| margin: 0; |
| font-family: "IBM Plex Sans", "Segoe UI", sans-serif; |
| color: var(--ink); |
| background: |
| radial-gradient(circle at top left, rgba(212, 132, 56, 0.18), transparent 28%), |
| radial-gradient(circle at top right, rgba(21, 93, 75, 0.16), transparent 26%), |
| linear-gradient(180deg, #fffaf4 0%, var(--bg) 100%); |
| }} |
| |
| .shell {{ |
| max-width: 1160px; |
| margin: 0 auto; |
| padding: 40px 24px 56px; |
| }} |
| |
| .hero {{ |
| display: grid; |
| grid-template-columns: 1.5fr 1fr; |
| gap: 24px; |
| align-items: stretch; |
| }} |
| |
| .panel {{ |
| background: var(--panel); |
| border: 1px solid var(--line); |
| border-radius: 24px; |
| box-shadow: var(--shadow); |
| backdrop-filter: blur(8px); |
| }} |
| |
| .hero-copy {{ |
| padding: 32px; |
| }} |
| |
| .eyebrow {{ |
| display: inline-flex; |
| align-items: center; |
| gap: 8px; |
| padding: 8px 12px; |
| border-radius: 999px; |
| background: rgba(166, 71, 18, 0.08); |
| color: var(--accent); |
| font-weight: 700; |
| letter-spacing: 0.04em; |
| text-transform: uppercase; |
| font-size: 12px; |
| }} |
| |
| h1 {{ |
| margin: 18px 0 14px; |
| font-family: Georgia, "Times New Roman", serif; |
| font-size: clamp(2.4rem, 5vw, 4.3rem); |
| line-height: 0.96; |
| letter-spacing: -0.03em; |
| }} |
| |
| .lede {{ |
| font-size: 1.05rem; |
| line-height: 1.7; |
| color: var(--muted); |
| max-width: 60ch; |
| }} |
| |
| .cta-row {{ |
| display: flex; |
| flex-wrap: wrap; |
| gap: 12px; |
| margin-top: 24px; |
| }} |
| |
| .cta {{ |
| display: inline-flex; |
| align-items: center; |
| justify-content: center; |
| padding: 12px 16px; |
| border-radius: 14px; |
| text-decoration: none; |
| font-weight: 700; |
| border: 1px solid transparent; |
| }} |
| |
| .cta.primary {{ |
| background: var(--accent); |
| color: #fff7f0; |
| }} |
| |
| .cta.secondary {{ |
| border-color: var(--line); |
| color: var(--ink); |
| background: rgba(255, 255, 255, 0.6); |
| }} |
| |
| .status-card {{ |
| padding: 28px; |
| display: flex; |
| flex-direction: column; |
| justify-content: space-between; |
| min-height: 100%; |
| }} |
| |
| .status-grid {{ |
| display: grid; |
| grid-template-columns: repeat(2, minmax(0, 1fr)); |
| gap: 12px; |
| margin-top: 18px; |
| }} |
| |
| .stat {{ |
| padding: 14px; |
| border-radius: 18px; |
| background: rgba(255, 255, 255, 0.72); |
| border: 1px solid var(--line); |
| }} |
| |
| .stat-label {{ |
| font-size: 0.72rem; |
| text-transform: uppercase; |
| letter-spacing: 0.08em; |
| color: var(--muted); |
| margin-bottom: 6px; |
| }} |
| |
| .stat-value {{ |
| font-size: 1.1rem; |
| font-weight: 700; |
| }} |
| |
| .section {{ |
| margin-top: 24px; |
| padding: 28px; |
| }} |
| |
| .section h2 {{ |
| margin: 0 0 14px; |
| font-size: 1.4rem; |
| }} |
| |
| .task-grid {{ |
| display: grid; |
| grid-template-columns: repeat(3, minmax(0, 1fr)); |
| gap: 14px; |
| }} |
| |
| .task-card {{ |
| padding: 20px; |
| border-radius: 20px; |
| background: rgba(255, 255, 255, 0.7); |
| border: 1px solid var(--line); |
| }} |
| |
| .task-card h3 {{ |
| margin: 10px 0 10px; |
| }} |
| |
| .task-pill {{ |
| display: inline-block; |
| font-size: 0.74rem; |
| font-weight: 700; |
| letter-spacing: 0.06em; |
| color: var(--accent-2); |
| text-transform: uppercase; |
| }} |
| |
| .api-grid {{ |
| display: grid; |
| grid-template-columns: 1.1fr 0.9fr; |
| gap: 16px; |
| }} |
| |
| pre {{ |
| margin: 0; |
| padding: 18px; |
| border-radius: 18px; |
| background: #1f1712; |
| color: #f5e7d9; |
| overflow-x: auto; |
| font-family: "IBM Plex Mono", "Consolas", monospace; |
| font-size: 0.93rem; |
| line-height: 1.55; |
| }} |
| |
| code.inline {{ |
| padding: 2px 8px; |
| border-radius: 999px; |
| background: rgba(166, 71, 18, 0.09); |
| color: var(--accent); |
| font-family: "IBM Plex Mono", "Consolas", monospace; |
| }} |
| |
| ul {{ |
| margin: 0; |
| padding-left: 20px; |
| line-height: 1.8; |
| color: var(--muted); |
| }} |
| |
| .footer-note {{ |
| margin-top: 16px; |
| color: var(--muted); |
| font-size: 0.95rem; |
| }} |
| |
| @media (max-width: 900px) {{ |
| .hero, |
| .task-grid, |
| .api-grid {{ |
| grid-template-columns: 1fr; |
| }} |
| |
| .shell {{ |
| padding: 24px 16px 40px; |
| }} |
| |
| .hero-copy, |
| .status-card, |
| .section {{ |
| padding: 22px; |
| }} |
| }} |
| </style> |
| </head> |
| <body> |
| <main class="shell"> |
| <section class="hero"> |
| <div class="panel hero-copy"> |
| <span class="eyebrow">OpenEnv Benchmark</span> |
| <h1>IDPI Exfil Environment</h1> |
| <p class="lede"> |
| A prompt-injection benchmark for secure email-style agent workflows. The runtime pack |
| contains curated easy, medium, and hard scenarios covering hidden relay instructions, |
| delayed summary hooks, transcript replay, markup-based attacks, and internal-output |
| contamination attempts. |
| </p> |
| <div class="cta-row"> |
| <a class="cta primary" href="/docs">Open API Docs</a> |
| <a class="cta secondary" href="/metadata">View Metadata</a> |
| <a class="cta secondary" href="/openapi.json">OpenAPI JSON</a> |
| </div> |
| </div> |
| <aside class="panel status-card"> |
| <div> |
| <span class="eyebrow">Service Status</span> |
| <div class="status-grid"> |
| <div class="stat"> |
| <div class="stat-label">Env</div> |
| <div class="stat-value">{ENV_NAME}</div> |
| </div> |
| <div class="stat"> |
| <div class="stat-label">Version</div> |
| <div class="stat-value">{APP_VERSION}</div> |
| </div> |
| <div class="stat"> |
| <div class="stat-label">Tasks</div> |
| <div class="stat-value">{", ".join(TASK_NAMES)}</div> |
| </div> |
| <div class="stat"> |
| <div class="stat-label">Runtime Pack</div> |
| <div class="stat-value">Curated / 32 scenarios</div> |
| </div> |
| </div> |
| </div> |
| <p class="footer-note"> |
| The API remains fully OpenEnv-compatible. This page is just a human-friendly landing |
| surface for the same benchmark endpoints the validator uses. |
| </p> |
| </aside> |
| </section> |
| |
| <section class="panel section"> |
| <h2>Runtime Pack</h2> |
| <div class="task-grid"> |
| {task_cards} |
| </div> |
| </section> |
| |
| <section class="panel section"> |
| <h2>Showcase Episodes</h2> |
| <div class="task-grid"> |
| {showcase_cards} |
| </div> |
| </section> |
| |
| <section class="panel section"> |
| <h2>Quick Start</h2> |
| <div class="api-grid"> |
| <pre>curl -X POST {SPACE_URL}/reset |
| |
| curl -X POST {SPACE_URL}/step \\ |
| -H "Content-Type: application/json" \\ |
| -d '{{"action": {{"type": "ReadDoc"}}}}' |
| |
| curl {SPACE_URL}/state |
| curl {SPACE_URL}/result</pre> |
| <div> |
| <ul> |
| <li>Use <span class="inline">POST /reset</span> to start a new episode. An empty body is valid.</li> |
| <li>Use <span class="inline">POST /step</span> with typed OpenEnv actions.</li> |
| <li>The evaluation runner uses the curated benchmark manifest as the runtime source of truth.</li> |
| <li>Inspect live schemas at <span class="inline">/schema</span>, docs at <span class="inline">/docs</span>, and metadata at <span class="inline">/metadata</span>.</li> |
| <li>Call <span class="inline">/health</span> for service health and <span class="inline">/result</span> after an episode to inspect grader output.</li> |
| </ul> |
| </div> |
| </div> |
| </section> |
| </main> |
| </body> |
| </html> |
| """ |
|
|
|
|
| @app.get("/", response_class=HTMLResponse) |
| def root() -> HTMLResponse: |
| return HTMLResponse(content=_landing_page_html()) |
|
|
|
|
| @app.get("/web", response_class=HTMLResponse) |
| def web_root() -> HTMLResponse: |
| return HTMLResponse(content=_landing_page_html()) |
|
|
|
|
| @app.get("/health", response_model=HealthResponse) |
| def health() -> HealthResponse: |
| return HealthResponse(status="healthy", env=ENV_NAME, version=APP_VERSION) |
|
|
|
|
| @app.get("/metadata", response_model=MetadataResponse) |
| def metadata() -> MetadataResponse: |
| return MetadataResponse( |
| name=ENV_NAME, |
| description=ENV_DESCRIPTION, |
| version=APP_VERSION, |
| tasks=TASK_NAMES, |
| ) |
|
|
|
|
| @app.get("/schema", response_model=SchemaResponse) |
| def schema() -> SchemaResponse: |
| return SchemaResponse( |
| action=_model_schema(Action), |
| observation=_model_schema(Observation), |
| state=_model_schema(State), |
| reward=_model_schema(Reward), |
| result=_model_schema(EpisodeResult), |
| ) |
|
|
|
|
| @app.post("/reset", response_model=Observation) |
| def reset(request: Optional[ResetRequest] = Body(default=None)) -> Observation: |
| try: |
| payload = request or ResetRequest() |
| return env.reset(difficulty=payload.difficulty, seed=payload.seed) |
| except ValueError as exc: |
| raise HTTPException(status_code=400, detail=str(exc)) from exc |
| except Exception as exc: |
| raise HTTPException(status_code=500, detail=str(exc)) from exc |
|
|
|
|
| @app.post("/step", response_model=StepResponse) |
| def step(request: StepRequest) -> StepResponse: |
| try: |
| observation, reward, done, info = env.step(request.action) |
| return StepResponse(observation=observation, reward=reward, done=done, info=info) |
| except RuntimeError as exc: |
| raise HTTPException(status_code=400, detail=str(exc)) from exc |
| except ValueError as exc: |
| raise HTTPException(status_code=400, detail=str(exc)) from exc |
| except Exception as exc: |
| raise HTTPException(status_code=500, detail=str(exc)) from exc |
|
|
|
|
| @app.get("/state", response_model=State) |
| def state() -> State: |
| try: |
| return env.state() |
| except RuntimeError as exc: |
| raise HTTPException(status_code=400, detail=str(exc)) from exc |
| except Exception as exc: |
| raise HTTPException(status_code=500, detail=str(exc)) from exc |
|
|
|
|
| @app.get("/result", response_model=EpisodeResult) |
| def result() -> EpisodeResult: |
| try: |
| return env.get_result() |
| except RuntimeError as exc: |
| raise HTTPException(status_code=400, detail=str(exc)) from exc |
| except Exception as exc: |
| raise HTTPException(status_code=500, detail=str(exc)) from exc |
|
|
|
|
| @app.post("/mcp") |
| def mcp(payload: Optional[Dict[str, Any]] = Body(default=None)) -> Dict[str, Any]: |
| request_id = None |
| if isinstance(payload, dict): |
| request_id = payload.get("id") |
|
|
| return { |
| "jsonrpc": "2.0", |
| "id": request_id, |
| "error": { |
| "code": -32601, |
| "message": "MCP bridge is not implemented for this benchmark. Use the HTTP reset/step/state/result endpoints.", |
| }, |
| } |
|
|
|
|
| def main() -> None: |
| uvicorn.run("server.app:app", host="0.0.0.0", port=7860, reload=False) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|