""" FastAPI server exposing the Content Moderation Queue OpenEnv environment. Each call to /reset creates an isolated session with its own state. Pass the returned session_id to /step and /state to avoid interference between concurrent users or test runs. """ import uuid from typing import Optional, Dict from fastapi import FastAPI, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import HTMLResponse from environment import ContentModerationEnv from environment.models import Action, Observation, StepResult, EnvironmentState # ═══════════════════════════════════════════════════════════════════════════ # Swagger CSS — warm cozy theme # ═══════════════════════════════════════════════════════════════════════════ SWAGGER_CSS = """ body { background: #fdf0dc !important; } .swagger-ui .topbar { background: linear-gradient(135deg, #f4a833, #e8923a) !important; border-bottom: 2px solid #e8923a !important; padding: 10px 0 !important; } .swagger-ui .topbar a { color: #fff !important; } .swagger-ui .topbar .download-url-wrapper .select-label select { border-color: rgba(255,255,255,0.3) !important; color: #fff !important; } .swagger-ui { color: #5a4530 !important; font-family: 'Inter', system-ui, sans-serif !important; } .swagger-ui .info .title { color: #3d2b1a !important; font-weight: 700 !important; } .swagger-ui .info .description p { color: #7a6550 !important; } .swagger-ui .info .title small.version-stamp { background: #f4a833 !important; color: #fff !important; border: none !important; border-radius: 12px !important; } .swagger-ui .scheme-container { background: #fbe8c8 !important; border: 1px solid #f0d4a0 !important; border-radius: 14px !important; box-shadow: 0 2px 8px rgba(180,140,80,0.08) !important; } /* Operation blocks */ .swagger-ui .opblock { border-radius: 14px !important; box-shadow: 0 2px 10px rgba(180,140,80,0.08) !important; margin-bottom: 14px !important; overflow: hidden !important; } .swagger-ui .opblock .opblock-summary { border: none !important; } .swagger-ui .opblock .opblock-summary-method { border-radius: 8px !important; font-weight: 700 !important; } .swagger-ui .opblock .opblock-summary-description { color: #7a6550 !important; } .swagger-ui .opblock .opblock-summary-path { color: #3d2b1a !important; } /* GET blocks — warm teal */ .swagger-ui .opblock-get { background: #fef9f0 !important; border: 1.5px solid #b8d8c8 !important; } .swagger-ui .opblock-get .opblock-summary-method { background: #5baa8a !important; color: #fff !important; } .swagger-ui .opblock-get .opblock-summary { border-color: transparent !important; } /* POST blocks — warm orange */ .swagger-ui .opblock-post { background: #fef6ed !important; border: 1.5px solid #f0c880 !important; } .swagger-ui .opblock-post .opblock-summary-method { background: #f4a833 !important; color: #fff !important; } .swagger-ui .opblock-post .opblock-summary { border-color: transparent !important; } /* Body */ .swagger-ui .opblock-body { background: #fdf5e8 !important; } .swagger-ui .opblock-body pre { background: #fef9f0 !important; color: #5a4530 !important; border: 1px solid #f0d4a0 !important; border-radius: 10px !important; } .swagger-ui .opblock-description-wrapper p { color: #7a6550 !important; } /* Tables */ .swagger-ui table thead tr td, .swagger-ui table thead tr th { color: #7a6550 !important; border-color: #f0d4a0 !important; } .swagger-ui table tbody tr td { color: #5a4530 !important; border-color: #f5e0c0 !important; } /* Parameters */ .swagger-ui .parameter__name { color: #3d2b1a !important; } .swagger-ui .parameter__type { color: #5baa8a !important; } .swagger-ui .parameter__name.required::after { color: #e86040 !important; } .swagger-ui .parameters-col_description p { color: #7a6550 !important; } /* Inputs */ .swagger-ui input[type=text], .swagger-ui textarea, .swagger-ui select { background: #fef9f0 !important; color: #3d2b1a !important; border: 1.5px solid #f0d4a0 !important; border-radius: 10px !important; font-family: 'JetBrains Mono', monospace !important; } .swagger-ui input[type=text]:focus, .swagger-ui textarea:focus { border-color: #f4a833 !important; box-shadow: 0 0 0 3px rgba(244,168,51,0.15) !important; } /* Execute button */ .swagger-ui .btn.execute { background: linear-gradient(135deg, #f4a833, #e8923a) !important; color: #fff !important; border: none !important; border-radius: 10px !important; box-shadow: 0 3px 12px rgba(244,168,51,0.3) !important; font-weight: 600 !important; padding: 8px 24px !important; } .swagger-ui .btn.execute:hover { box-shadow: 0 5px 20px rgba(244,168,51,0.4) !important; transform: translateY(-1px); } /* Try-out button */ .swagger-ui .try-out__btn { color: #f4a833 !important; border-color: #f0c880 !important; border-radius: 10px !important; } .swagger-ui .try-out__btn:hover { background: rgba(244,168,51,0.08) !important; } /* Cancel */ .swagger-ui .btn-group .cancel { color: #7a6550 !important; border-color: #e0c8a0 !important; } /* Responses */ .swagger-ui .responses-inner { background: transparent !important; } .swagger-ui .response-col_status { color: #5baa8a !important; font-weight: 600 !important; } .swagger-ui .response-col_description { color: #7a6550 !important; } /* Live response */ .swagger-ui .microlight { background: #fef9f0 !important; color: #5a4530 !important; border-radius: 10px !important; border: 1px solid #f0d4a0 !important; } /* Models */ .swagger-ui section.models { border: 1.5px solid #f0d4a0 !important; border-radius: 14px !important; background: #fef6ed !important; } .swagger-ui section.models h4 { color: #3d2b1a !important; border-color: #f0d4a0 !important; } .swagger-ui .model-title { color: #3d2b1a !important; } .swagger-ui .model { color: #5a4530 !important; } .swagger-ui .model .property { color: #7a6550 !important; } .swagger-ui .model .property.primitive { color: #5baa8a !important; } .swagger-ui .prop-type { color: #c47830 !important; } .swagger-ui .model-box { background: #fdf5e8 !important; border-radius: 10px !important; } .swagger-ui section.models .model-container { background: #fdf5e8 !important; border-radius: 10px !important; margin: 4px 0 !important; } /* Links */ .swagger-ui a { color: #e08030 !important; } .swagger-ui a:hover { color: #c06020 !important; } /* Section tags */ .swagger-ui .opblock-tag { color: #3d2b1a !important; border-color: #f0d4a0 !important; } /* Expand arrows */ .swagger-ui .expand-operation svg, .swagger-ui .expand-methods svg { fill: #c4a070 !important; } /* Markdown */ .swagger-ui .markdown p, .swagger-ui .renderedMarkdown p { color: #7a6550 !important; } .swagger-ui .markdown li, .swagger-ui .renderedMarkdown li { color: #7a6550 !important; } .swagger-ui .markdown code { background: #fbe8c8 !important; color: #c47830 !important; border-radius: 6px !important; padding: 1px 6px !important; } /* Scrollbar */ ::-webkit-scrollbar { width: 6px; } ::-webkit-scrollbar-track { background: #fdf0dc; } ::-webkit-scrollbar-thumb { background: #e0c8a0; border-radius: 3px; } """ # ═══════════════════════════════════════════════════════════════════════════ # App setup # ═══════════════════════════════════════════════════════════════════════════ app = FastAPI( title="Content Moderation Queue — OpenEnv", description=( "A real-world content moderation environment where AI agents learn " "to triage social media posts using a tiered policy framework. " "Implements the full OpenEnv spec: step() / reset() / state(). " "Each /reset call creates an isolated session — pass session_id to /step and /state." ), version="1.0.0", docs_url=None, ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"], ) _sessions: Dict[str, ContentModerationEnv] = {} MAX_SESSIONS = 200 _shared_env = ContentModerationEnv() def _get_session(session_id: str) -> ContentModerationEnv: if session_id not in _sessions: raise HTTPException( status_code=404, detail=f"Session '{session_id}' not found. Call POST /reset first to create a session." ) return _sessions[session_id] def _new_session() -> tuple[str, ContentModerationEnv]: sid = str(uuid.uuid4())[:8] if len(_sessions) >= MAX_SESSIONS: oldest = next(iter(_sessions)) del _sessions[oldest] env = ContentModerationEnv() _sessions[sid] = env return sid, env # ═══════════════════════════════════════════════════════════════════════════ # Landing page — warm cozy theme # ═══════════════════════════════════════════════════════════════════════════ LANDING_HTML = """ Content Moderation Queue — OpenEnv
🛡️

Content Moderation Queue

OpenEnv Live v1.0.0

A real-world RL environment simulating Trust & Safety moderation. AI agents triage social media posts, handle appeals, detect crisis content, and apply graduated policy enforcement.

30
Labeled Posts
3
Difficulty Levels
6
Action Types
9
Violation Types
How It Works
1
Reset
Start episode, pick difficulty
POST /reset
2
Observe
Read post, history, context
session_id
3
Decide
Choose action + violation
POST /step
4
Score
Get reward 0.0 - 1.0
GET /state
Tasks
Easy
Binary Content Moderation
Classify 8 posts as safe or harmful. Clear signals: spam, hate speech vs everyday content.
8 posts2 actionsBinary
Medium
Tiered Policy Enforcement
Apply 5-level severity. Harsh criticism, repeat offenders, political content, zero-tolerance.
10 posts5 actionsPartial credit
Hard
Full Queue + Appeals
Appeals, gaming slang vs threats, crisis escalation, repeat offenders, second-appeal denials.
12 posts6 actionsContext bonus
Action Space
approve
Safe, policy-compliant content
warn
Minor first-time violation
remove
Clear policy violation
ban_temporary
Repeat or serious offense
ban_permanent
Zero-tolerance or 5+ violations
escalate
Ambiguous, appeals, or crisis
API Endpoints
GET/healthLiveness check
GET/tasksList all tasks
POST/reset?task_id=task_easyStart episode, get session_id
POST/step?session_id=abc123Submit action, get reward
GET/state?session_id=abc123Full state + score
Reward Design
Non-sparse: every post scores independently (0.0 - 1.0)
Partial credit: one severity level off scores ~0.65 instead of 0.0
Context bonus: +0.3 for history-dependent or context-dependent posts
Violation ID: correctly identifying the violation category earns bonus
Baseline Scores

Meta Llama 3 8B Instruct

temperature=0 | seed=42 | reproducible
Easy
0.500
Medium
0.533
Hard
0.423
Interactive API Docs View Tasks Health Check
""" @app.get("/", response_class=HTMLResponse, include_in_schema=False) def root(): return LANDING_HTML @app.get("/docs", include_in_schema=False) def custom_docs(): return HTMLResponse(f""" Content Moderation Queue — API Docs
""") # ═══════════════════════════════════════════════════════════════════════════ # API Endpoints # ═══════════════════════════════════════════════════════════════════════════ @app.get("/health") def health(): """Liveness probe — returns 200 when server is ready.""" return {"status": "ok", "environment": "content-moderation-queue", "version": "1.0.0"} @app.get("/tasks") def list_tasks(): """List all available tasks with metadata.""" return {"tasks": _shared_env.list_tasks()} @app.post("/reset", response_model=Observation) def reset( task_id: str = Query(default="task_easy", description="One of: task_easy, task_medium, task_hard"), seed: Optional[int] = Query(default=None, description="Seed for post order. None=random each episode, integer=fixed reproducible order"), ): """ Start a new episode. Creates an **isolated session** for you. - **task_id**: Which task to run (task_easy | task_medium | task_hard) - **seed**: Optional. Omit for random post order (RL training). Pass integer (e.g. 42) for reproducible order. The response includes a **session_id** — copy it and pass it to every `/step` and `/state` call. """ try: sid, env = _new_session() obs = env.reset(task_id=task_id, seed=seed, session_id=sid) return obs except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) @app.post("/step", response_model=StepResult) def step( action: Action, session_id: str = Query(..., description="session_id from /reset response"), ): """ Submit a moderation decision for the current post in your session. **Action fields:** - `action_type`: One of approve / warn / remove / ban_temporary / ban_permanent / escalate - `reasoning`: Optional explanation (logged, not graded) - `violation_type`: Optional — spam / hate_speech / harassment / misinformation / csam / illegal_services / doxxing / self_harm_risk / none Returns the next Observation, reward (0.0-1.0), done flag, and info dict. """ env = _get_session(session_id) try: return env.step(action) except RuntimeError as e: raise HTTPException(status_code=400, detail=str(e)) @app.get("/state", response_model=EnvironmentState) def state( session_id: str = Query(..., description="session_id from /reset response"), ): """ Return a full snapshot of your session's current state. Includes step count, cumulative reward, all decisions, and final_score once done. """ env = _get_session(session_id) return env.state()