Spaces:
Sleeping
Sleeping
| """ | |
| FastAPI server exposing the Content Moderation Queue OpenEnv environment. | |
| Each call to /reset creates an isolated session with its own state. | |
| Pass the returned session_id to /step and /state to avoid interference | |
| between concurrent users or test runs. | |
| """ | |
| import uuid | |
| from typing import Optional, Dict | |
| from fastapi import FastAPI, HTTPException, Query | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import HTMLResponse | |
| from environment import ContentModerationEnv | |
| from environment.models import Action, Observation, StepResult, EnvironmentState | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Swagger CSS β warm cozy theme | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SWAGGER_CSS = """ | |
| body { background: #fdf0dc !important; } | |
| .swagger-ui .topbar { | |
| background: linear-gradient(135deg, #f4a833, #e8923a) !important; | |
| border-bottom: 2px solid #e8923a !important; | |
| padding: 10px 0 !important; | |
| } | |
| .swagger-ui .topbar a { color: #fff !important; } | |
| .swagger-ui .topbar .download-url-wrapper .select-label select { | |
| border-color: rgba(255,255,255,0.3) !important; | |
| color: #fff !important; | |
| } | |
| .swagger-ui { color: #5a4530 !important; font-family: 'Inter', system-ui, sans-serif !important; } | |
| .swagger-ui .info .title { color: #3d2b1a !important; font-weight: 700 !important; } | |
| .swagger-ui .info .description p { color: #7a6550 !important; } | |
| .swagger-ui .info .title small.version-stamp { | |
| background: #f4a833 !important; | |
| color: #fff !important; | |
| border: none !important; | |
| border-radius: 12px !important; | |
| } | |
| .swagger-ui .scheme-container { | |
| background: #fbe8c8 !important; | |
| border: 1px solid #f0d4a0 !important; | |
| border-radius: 14px !important; | |
| box-shadow: 0 2px 8px rgba(180,140,80,0.08) !important; | |
| } | |
| /* Operation blocks */ | |
| .swagger-ui .opblock { | |
| border-radius: 14px !important; | |
| box-shadow: 0 2px 10px rgba(180,140,80,0.08) !important; | |
| margin-bottom: 14px !important; | |
| overflow: hidden !important; | |
| } | |
| .swagger-ui .opblock .opblock-summary { border: none !important; } | |
| .swagger-ui .opblock .opblock-summary-method { border-radius: 8px !important; font-weight: 700 !important; } | |
| .swagger-ui .opblock .opblock-summary-description { color: #7a6550 !important; } | |
| .swagger-ui .opblock .opblock-summary-path { color: #3d2b1a !important; } | |
| /* GET blocks β warm teal */ | |
| .swagger-ui .opblock-get { | |
| background: #fef9f0 !important; | |
| border: 1.5px solid #b8d8c8 !important; | |
| } | |
| .swagger-ui .opblock-get .opblock-summary-method { | |
| background: #5baa8a !important; | |
| color: #fff !important; | |
| } | |
| .swagger-ui .opblock-get .opblock-summary { border-color: transparent !important; } | |
| /* POST blocks β warm orange */ | |
| .swagger-ui .opblock-post { | |
| background: #fef6ed !important; | |
| border: 1.5px solid #f0c880 !important; | |
| } | |
| .swagger-ui .opblock-post .opblock-summary-method { | |
| background: #f4a833 !important; | |
| color: #fff !important; | |
| } | |
| .swagger-ui .opblock-post .opblock-summary { border-color: transparent !important; } | |
| /* Body */ | |
| .swagger-ui .opblock-body { background: #fdf5e8 !important; } | |
| .swagger-ui .opblock-body pre { | |
| background: #fef9f0 !important; | |
| color: #5a4530 !important; | |
| border: 1px solid #f0d4a0 !important; | |
| border-radius: 10px !important; | |
| } | |
| .swagger-ui .opblock-description-wrapper p { color: #7a6550 !important; } | |
| /* Tables */ | |
| .swagger-ui table thead tr td, .swagger-ui table thead tr th { | |
| color: #7a6550 !important; | |
| border-color: #f0d4a0 !important; | |
| } | |
| .swagger-ui table tbody tr td { | |
| color: #5a4530 !important; | |
| border-color: #f5e0c0 !important; | |
| } | |
| /* Parameters */ | |
| .swagger-ui .parameter__name { color: #3d2b1a !important; } | |
| .swagger-ui .parameter__type { color: #5baa8a !important; } | |
| .swagger-ui .parameter__name.required::after { color: #e86040 !important; } | |
| .swagger-ui .parameters-col_description p { color: #7a6550 !important; } | |
| /* Inputs */ | |
| .swagger-ui input[type=text], .swagger-ui textarea, .swagger-ui select { | |
| background: #fef9f0 !important; | |
| color: #3d2b1a !important; | |
| border: 1.5px solid #f0d4a0 !important; | |
| border-radius: 10px !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| } | |
| .swagger-ui input[type=text]:focus, .swagger-ui textarea:focus { | |
| border-color: #f4a833 !important; | |
| box-shadow: 0 0 0 3px rgba(244,168,51,0.15) !important; | |
| } | |
| /* Execute button */ | |
| .swagger-ui .btn.execute { | |
| background: linear-gradient(135deg, #f4a833, #e8923a) !important; | |
| color: #fff !important; | |
| border: none !important; | |
| border-radius: 10px !important; | |
| box-shadow: 0 3px 12px rgba(244,168,51,0.3) !important; | |
| font-weight: 600 !important; | |
| padding: 8px 24px !important; | |
| } | |
| .swagger-ui .btn.execute:hover { | |
| box-shadow: 0 5px 20px rgba(244,168,51,0.4) !important; | |
| transform: translateY(-1px); | |
| } | |
| /* Try-out button */ | |
| .swagger-ui .try-out__btn { | |
| color: #f4a833 !important; | |
| border-color: #f0c880 !important; | |
| border-radius: 10px !important; | |
| } | |
| .swagger-ui .try-out__btn:hover { background: rgba(244,168,51,0.08) !important; } | |
| /* Cancel */ | |
| .swagger-ui .btn-group .cancel { color: #7a6550 !important; border-color: #e0c8a0 !important; } | |
| /* Responses */ | |
| .swagger-ui .responses-inner { background: transparent !important; } | |
| .swagger-ui .response-col_status { color: #5baa8a !important; font-weight: 600 !important; } | |
| .swagger-ui .response-col_description { color: #7a6550 !important; } | |
| /* Live response */ | |
| .swagger-ui .microlight { | |
| background: #fef9f0 !important; | |
| color: #5a4530 !important; | |
| border-radius: 10px !important; | |
| border: 1px solid #f0d4a0 !important; | |
| } | |
| /* Models */ | |
| .swagger-ui section.models { | |
| border: 1.5px solid #f0d4a0 !important; | |
| border-radius: 14px !important; | |
| background: #fef6ed !important; | |
| } | |
| .swagger-ui section.models h4 { color: #3d2b1a !important; border-color: #f0d4a0 !important; } | |
| .swagger-ui .model-title { color: #3d2b1a !important; } | |
| .swagger-ui .model { color: #5a4530 !important; } | |
| .swagger-ui .model .property { color: #7a6550 !important; } | |
| .swagger-ui .model .property.primitive { color: #5baa8a !important; } | |
| .swagger-ui .prop-type { color: #c47830 !important; } | |
| .swagger-ui .model-box { background: #fdf5e8 !important; border-radius: 10px !important; } | |
| .swagger-ui section.models .model-container { | |
| background: #fdf5e8 !important; | |
| border-radius: 10px !important; | |
| margin: 4px 0 !important; | |
| } | |
| /* Links */ | |
| .swagger-ui a { color: #e08030 !important; } | |
| .swagger-ui a:hover { color: #c06020 !important; } | |
| /* Section tags */ | |
| .swagger-ui .opblock-tag { color: #3d2b1a !important; border-color: #f0d4a0 !important; } | |
| /* Expand arrows */ | |
| .swagger-ui .expand-operation svg, .swagger-ui .expand-methods svg { fill: #c4a070 !important; } | |
| /* Markdown */ | |
| .swagger-ui .markdown p, .swagger-ui .renderedMarkdown p { color: #7a6550 !important; } | |
| .swagger-ui .markdown li, .swagger-ui .renderedMarkdown li { color: #7a6550 !important; } | |
| .swagger-ui .markdown code { | |
| background: #fbe8c8 !important; | |
| color: #c47830 !important; | |
| border-radius: 6px !important; | |
| padding: 1px 6px !important; | |
| } | |
| /* Scrollbar */ | |
| ::-webkit-scrollbar { width: 6px; } | |
| ::-webkit-scrollbar-track { background: #fdf0dc; } | |
| ::-webkit-scrollbar-thumb { background: #e0c8a0; border-radius: 3px; } | |
| """ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # App setup | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| app = FastAPI( | |
| title="Content Moderation Queue β OpenEnv", | |
| description=( | |
| "A real-world content moderation environment where AI agents learn " | |
| "to triage social media posts using a tiered policy framework. " | |
| "Implements the full OpenEnv spec: step() / reset() / state(). " | |
| "Each /reset call creates an isolated session β pass session_id to /step and /state." | |
| ), | |
| version="1.0.0", | |
| docs_url=None, | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| _sessions: Dict[str, ContentModerationEnv] = {} | |
| MAX_SESSIONS = 200 | |
| _shared_env = ContentModerationEnv() | |
| def _get_session(session_id: str) -> ContentModerationEnv: | |
| if session_id not in _sessions: | |
| raise HTTPException( | |
| status_code=404, | |
| detail=f"Session '{session_id}' not found. Call POST /reset first to create a session." | |
| ) | |
| return _sessions[session_id] | |
| def _new_session() -> tuple[str, ContentModerationEnv]: | |
| sid = str(uuid.uuid4())[:8] | |
| if len(_sessions) >= MAX_SESSIONS: | |
| oldest = next(iter(_sessions)) | |
| del _sessions[oldest] | |
| env = ContentModerationEnv() | |
| _sessions[sid] = env | |
| return sid, env | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Landing page β warm cozy theme | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| LANDING_HTML = """ | |
| <!DOCTYPE html> | |
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>Content Moderation Queue β OpenEnv</title> | |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap" rel="stylesheet"> | |
| <style> | |
| :root { | |
| --cream: #fdf0dc; | |
| --cream2: #fbe8c8; | |
| --card: #fef6ed; | |
| --card2: #fef9f0; | |
| --border: #f0d4a0; | |
| --border2: #e8c890; | |
| --text: #3d2b1a; | |
| --text2: #5a4530; | |
| --muted: #9a8060; | |
| --orange: #f4a833; | |
| --orange2: #e8923a; | |
| --peach: #f8c06a; | |
| --teal: #5baa8a; | |
| --red-soft: #e86040; | |
| --rose: #e88070; | |
| --sand: #d4b888; | |
| } | |
| * { margin: 0; padding: 0; box-sizing: border-box; } | |
| body { | |
| font-family: 'Inter', system-ui, sans-serif; | |
| background: var(--cream); | |
| color: var(--text2); | |
| min-height: 100vh; | |
| background-image: | |
| radial-gradient(ellipse at 30% 0%, rgba(244,168,51,0.08) 0%, transparent 50%), | |
| radial-gradient(ellipse at 80% 100%, rgba(232,146,58,0.06) 0%, transparent 50%); | |
| } | |
| /* βββ HERO βββ */ | |
| .hero { | |
| background: linear-gradient(160deg, #fbe8c8 0%, #f8d8a4 40%, #f4c87a 100%); | |
| padding: 52px 24px 44px; | |
| text-align: center; | |
| position: relative; | |
| overflow: hidden; | |
| border-bottom: 2px solid var(--border); | |
| } | |
| .hero::before { | |
| content: ''; | |
| position: absolute; | |
| bottom: -2px; left: 0; right: 0; | |
| height: 40px; | |
| background: url("data:image/svg+xml,%3Csvg viewBox='0 0 1200 40' xmlns='http://www.w3.org/2000/svg'%3E%3Cpath d='M0,20 Q150,0 300,20 Q450,40 600,20 Q750,0 900,20 Q1050,40 1200,20 V40 H0 Z' fill='%23fdf0dc'/%3E%3C/svg%3E") no-repeat center; | |
| background-size: cover; | |
| } | |
| .hero-icon { | |
| width: 72px; height: 72px; | |
| background: rgba(255,255,255,0.6); | |
| border: 2px solid rgba(255,255,255,0.8); | |
| border-radius: 20px; | |
| display: inline-flex; | |
| align-items: center; | |
| justify-content: center; | |
| font-size: 2.2rem; | |
| margin-bottom: 16px; | |
| box-shadow: 0 4px 16px rgba(200,150,60,0.15); | |
| } | |
| .hero h1 { | |
| font-size: 2rem; | |
| font-weight: 800; | |
| color: var(--text); | |
| letter-spacing: -0.5px; | |
| margin-bottom: 10px; | |
| } | |
| .badges { display: flex; gap: 8px; justify-content: center; margin-bottom: 16px; } | |
| .badge { | |
| padding: 4px 14px; | |
| border-radius: 20px; | |
| font-size: 0.68rem; | |
| font-weight: 700; | |
| text-transform: uppercase; | |
| letter-spacing: 0.8px; | |
| } | |
| .b-env { background: var(--orange); color: #fff; } | |
| .b-live { background: var(--teal); color: #fff; } | |
| .b-ver { background: rgba(255,255,255,0.6); color: var(--muted); border: 1px solid var(--border); } | |
| .hero-desc { | |
| max-width: 520px; | |
| margin: 0 auto; | |
| color: var(--text2); | |
| font-size: 0.92rem; | |
| line-height: 1.65; | |
| opacity: 0.85; | |
| } | |
| .container { max-width: 900px; margin: 0 auto; padding: 32px 24px; } | |
| /* βββ CARD BASE βββ */ | |
| .card { | |
| background: var(--card); | |
| border: 1.5px solid var(--border); | |
| border-radius: 16px; | |
| box-shadow: 0 2px 10px rgba(180,140,80,0.06); | |
| } | |
| /* βββ STATS βββ */ | |
| .stats { display: grid; grid-template-columns: repeat(4, 1fr); gap: 14px; margin-bottom: 32px; } | |
| .stat { padding: 20px 14px; text-align: center; } | |
| .stat-val { | |
| font-size: 1.8rem; | |
| font-weight: 800; | |
| color: var(--orange2); | |
| } | |
| .stat-lbl { | |
| font-size: 0.7rem; | |
| color: var(--muted); | |
| text-transform: uppercase; | |
| letter-spacing: 0.8px; | |
| margin-top: 4px; | |
| font-weight: 500; | |
| } | |
| /* βββ SECTION TITLE βββ */ | |
| .stitle { | |
| font-size: 0.72rem; | |
| font-weight: 700; | |
| color: var(--muted); | |
| text-transform: uppercase; | |
| letter-spacing: 1.5px; | |
| margin-bottom: 14px; | |
| display: flex; | |
| align-items: center; | |
| gap: 12px; | |
| } | |
| .stitle::after { | |
| content: ''; | |
| flex: 1; | |
| height: 1.5px; | |
| background: linear-gradient(90deg, var(--border), transparent); | |
| } | |
| /* βββ HOW IT WORKS βββ */ | |
| .flow { display: grid; grid-template-columns: repeat(4, 1fr); gap: 12px; margin-bottom: 32px; } | |
| .flow-card { | |
| padding: 20px 14px; | |
| text-align: center; | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .flow-card::before { | |
| content: ''; | |
| position: absolute; | |
| top: 0; left: 0; right: 0; | |
| height: 3px; | |
| background: linear-gradient(90deg, var(--orange), var(--peach)); | |
| border-radius: 16px 16px 0 0; | |
| } | |
| .flow-n { | |
| display: inline-flex; | |
| width: 32px; height: 32px; | |
| align-items: center; | |
| justify-content: center; | |
| background: var(--orange); | |
| color: #fff; | |
| border-radius: 10px; | |
| font-size: 0.85rem; | |
| font-weight: 700; | |
| } | |
| .flow-t { font-size: 0.88rem; font-weight: 600; color: var(--text); margin-top: 8px; } | |
| .flow-d { font-size: 0.75rem; color: var(--muted); margin-top: 4px; } | |
| .flow-c { | |
| display: inline-block; | |
| margin-top: 8px; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 0.67rem; | |
| color: var(--teal); | |
| background: var(--cream2); | |
| padding: 3px 10px; | |
| border-radius: 8px; | |
| border: 1px solid var(--border); | |
| font-weight: 500; | |
| } | |
| /* βββ TASKS βββ */ | |
| .tasks { display: grid; grid-template-columns: repeat(3, 1fr); gap: 14px; margin-bottom: 32px; } | |
| .task { padding: 22px 18px; position: relative; overflow: hidden; } | |
| .task::before { | |
| content: ''; | |
| position: absolute; | |
| left: 0; top: 0; bottom: 0; | |
| width: 4px; | |
| border-radius: 16px 0 0 16px; | |
| } | |
| .t-easy::before { background: var(--teal); } | |
| .t-med::before { background: var(--orange); } | |
| .t-hard::before { background: var(--red-soft); } | |
| .task-diff { | |
| font-size: 0.62rem; | |
| font-weight: 700; | |
| text-transform: uppercase; | |
| letter-spacing: 1.2px; | |
| margin-bottom: 6px; | |
| } | |
| .t-easy .task-diff { color: var(--teal); } | |
| .t-med .task-diff { color: var(--orange2); } | |
| .t-hard .task-diff { color: var(--red-soft); } | |
| .task-name { font-size: 0.92rem; font-weight: 700; color: var(--text); margin-bottom: 6px; } | |
| .task-desc { font-size: 0.78rem; color: var(--muted); line-height: 1.5; margin-bottom: 12px; } | |
| .chips { display: flex; gap: 6px; flex-wrap: wrap; } | |
| .chip { | |
| font-size: 0.65rem; | |
| font-weight: 500; | |
| padding: 3px 10px; | |
| border-radius: 8px; | |
| background: var(--cream2); | |
| border: 1px solid var(--border); | |
| color: var(--muted); | |
| } | |
| /* βββ ACTIONS βββ */ | |
| .actions { display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; margin-bottom: 32px; } | |
| .act { padding: 14px 16px; transition: transform 0.2s; } | |
| .act:hover { transform: translateY(-2px); } | |
| .act-name { | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 0.82rem; | |
| font-weight: 600; | |
| margin-bottom: 3px; | |
| } | |
| .act-desc { font-size: 0.72rem; color: var(--muted); } | |
| .a-approve .act-name { color: var(--teal); } | |
| .a-warn .act-name { color: var(--orange); } | |
| .a-remove .act-name { color: var(--orange2); } | |
| .a-bant .act-name { color: var(--red-soft); } | |
| .a-banp .act-name { color: #c43030; } | |
| .a-esc .act-name { color: #9070b0; } | |
| /* βββ ENDPOINTS βββ */ | |
| .ep-table { margin-bottom: 32px; overflow: hidden; } | |
| .ep { | |
| display: flex; | |
| align-items: center; | |
| padding: 12px 18px; | |
| gap: 14px; | |
| border-bottom: 1px solid rgba(240,212,160,0.5); | |
| } | |
| .ep:last-child { border-bottom: none; } | |
| .ep-m { | |
| font-size: 0.64rem; | |
| font-weight: 700; | |
| padding: 4px 10px; | |
| border-radius: 8px; | |
| text-transform: uppercase; | |
| letter-spacing: 0.5px; | |
| min-width: 48px; | |
| text-align: center; | |
| } | |
| .ep-g { background: rgba(91,170,138,0.12); color: var(--teal); border: 1px solid rgba(91,170,138,0.25); } | |
| .ep-p { background: rgba(244,168,51,0.12); color: var(--orange2); border: 1px solid rgba(244,168,51,0.25); } | |
| .ep-path { | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 0.8rem; | |
| color: var(--text); | |
| flex: 1; | |
| font-weight: 500; | |
| } | |
| .ep-info { font-size: 0.78rem; color: var(--muted); } | |
| /* βββ REWARD βββ */ | |
| .reward { padding: 22px 20px; margin-bottom: 32px; } | |
| .reward h3 { font-size: 0.95rem; font-weight: 700; color: var(--text); margin-bottom: 14px; } | |
| .r-item { display: flex; align-items: center; gap: 10px; margin-bottom: 10px; font-size: 0.82rem; color: var(--text2); } | |
| .r-dot { width: 10px; height: 10px; border-radius: 50%; flex-shrink: 0; } | |
| .rg { background: var(--teal); } | |
| .ry { background: var(--orange); } | |
| .rb { background: #5a9ac0; } | |
| .rr { background: var(--rose); } | |
| /* βββ BASELINE βββ */ | |
| .baseline { padding: 22px 20px; margin-bottom: 32px; } | |
| .baseline h3 { font-size: 0.92rem; font-weight: 700; color: var(--text); margin-bottom: 6px; } | |
| .bl-sub { font-size: 0.73rem; color: var(--muted); margin-bottom: 16px; } | |
| .sc-row { display: flex; align-items: center; gap: 12px; margin-bottom: 10px; } | |
| .sc-lbl { font-size: 0.78rem; width: 72px; font-weight: 600; } | |
| .sc-bg { | |
| flex: 1; height: 24px; border-radius: 12px; | |
| background: var(--cream2); | |
| border: 1px solid var(--border); | |
| overflow: hidden; | |
| } | |
| .sc-bar { height: 100%; border-radius: 11px; } | |
| .sc-bar-g { background: linear-gradient(90deg, #4a9a7a, var(--teal)); } | |
| .sc-bar-y { background: linear-gradient(90deg, #d4922a, var(--orange)); } | |
| .sc-bar-r { background: linear-gradient(90deg, #c84830, var(--red-soft)); } | |
| .sc-val { font-size: 0.82rem; font-weight: 700; width: 48px; text-align: right; color: var(--text); } | |
| /* βββ CTA βββ */ | |
| .ctas { display: flex; gap: 12px; margin-bottom: 36px; } | |
| .cta { | |
| flex: 1; | |
| display: block; | |
| text-align: center; | |
| padding: 14px 20px; | |
| border-radius: 12px; | |
| text-decoration: none; | |
| font-weight: 600; | |
| font-size: 0.88rem; | |
| transition: all 0.25s ease; | |
| } | |
| .cta:hover { transform: translateY(-2px); } | |
| .cta-1 { | |
| background: linear-gradient(135deg, var(--orange), var(--orange2)); | |
| color: #fff; | |
| box-shadow: 0 4px 16px rgba(244,168,51,0.25); | |
| } | |
| .cta-1:hover { box-shadow: 0 6px 24px rgba(244,168,51,0.35); } | |
| .cta-2 { | |
| background: var(--card); | |
| color: var(--teal); | |
| border: 1.5px solid rgba(91,170,138,0.3); | |
| } | |
| .cta-2:hover { background: rgba(91,170,138,0.06); } | |
| .cta-3 { | |
| background: var(--card); | |
| color: var(--text2); | |
| border: 1.5px solid var(--border); | |
| } | |
| /* βββ FOOTER βββ */ | |
| .footer { | |
| text-align: center; | |
| padding: 28px 0; | |
| border-top: 1.5px solid var(--border); | |
| color: var(--muted); | |
| font-size: 0.75rem; | |
| } | |
| .footer a { color: var(--orange2); text-decoration: none; } | |
| .footer a:hover { text-decoration: underline; } | |
| /* βββ DECORATIVE BLOBS βββ */ | |
| .blob { | |
| position: absolute; | |
| border-radius: 50%; | |
| opacity: 0.12; | |
| pointer-events: none; | |
| } | |
| .blob-1 { width: 200px; height: 200px; background: var(--orange); top: -60px; right: -40px; } | |
| .blob-2 { width: 140px; height: 140px; background: var(--peach); bottom: -30px; left: -30px; } | |
| @media (max-width: 720px) { | |
| .stats, .flow { grid-template-columns: repeat(2, 1fr); } | |
| .tasks, .actions { grid-template-columns: 1fr; } | |
| .ctas { flex-direction: column; } | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="hero"> | |
| <div class="blob blob-1"></div> | |
| <div class="blob blob-2"></div> | |
| <div class="hero-icon">🛡️</div> | |
| <h1>Content Moderation Queue</h1> | |
| <div class="badges"> | |
| <span class="badge b-env">OpenEnv</span> | |
| <span class="badge b-live">Live</span> | |
| <span class="badge b-ver">v1.0.0</span> | |
| </div> | |
| <p class="hero-desc">A real-world RL environment simulating Trust & Safety moderation. | |
| AI agents triage social media posts, handle appeals, detect crisis content, | |
| and apply graduated policy enforcement.</p> | |
| </div> | |
| <div class="container"> | |
| <div class="stats"> | |
| <div class="card stat"><div class="stat-val">30</div><div class="stat-lbl">Labeled Posts</div></div> | |
| <div class="card stat"><div class="stat-val">3</div><div class="stat-lbl">Difficulty Levels</div></div> | |
| <div class="card stat"><div class="stat-val">6</div><div class="stat-lbl">Action Types</div></div> | |
| <div class="card stat"><div class="stat-val">9</div><div class="stat-lbl">Violation Types</div></div> | |
| </div> | |
| <div class="stitle">How It Works</div> | |
| <div class="flow"> | |
| <div class="card flow-card"><div class="flow-n">1</div><div class="flow-t">Reset</div><div class="flow-d">Start episode, pick difficulty</div><div class="flow-c">POST /reset</div></div> | |
| <div class="card flow-card"><div class="flow-n">2</div><div class="flow-t">Observe</div><div class="flow-d">Read post, history, context</div><div class="flow-c">session_id</div></div> | |
| <div class="card flow-card"><div class="flow-n">3</div><div class="flow-t">Decide</div><div class="flow-d">Choose action + violation</div><div class="flow-c">POST /step</div></div> | |
| <div class="card flow-card"><div class="flow-n">4</div><div class="flow-t">Score</div><div class="flow-d">Get reward 0.0 - 1.0</div><div class="flow-c">GET /state</div></div> | |
| </div> | |
| <div class="stitle">Tasks</div> | |
| <div class="tasks"> | |
| <div class="card task t-easy"> | |
| <div class="task-diff">Easy</div> | |
| <div class="task-name">Binary Content Moderation</div> | |
| <div class="task-desc">Classify 8 posts as safe or harmful. Clear signals: spam, hate speech vs everyday content.</div> | |
| <div class="chips"><span class="chip">8 posts</span><span class="chip">2 actions</span><span class="chip">Binary</span></div> | |
| </div> | |
| <div class="card task t-med"> | |
| <div class="task-diff">Medium</div> | |
| <div class="task-name">Tiered Policy Enforcement</div> | |
| <div class="task-desc">Apply 5-level severity. Harsh criticism, repeat offenders, political content, zero-tolerance.</div> | |
| <div class="chips"><span class="chip">10 posts</span><span class="chip">5 actions</span><span class="chip">Partial credit</span></div> | |
| </div> | |
| <div class="card task t-hard"> | |
| <div class="task-diff">Hard</div> | |
| <div class="task-name">Full Queue + Appeals</div> | |
| <div class="task-desc">Appeals, gaming slang vs threats, crisis escalation, repeat offenders, second-appeal denials.</div> | |
| <div class="chips"><span class="chip">12 posts</span><span class="chip">6 actions</span><span class="chip">Context bonus</span></div> | |
| </div> | |
| </div> | |
| <div class="stitle">Action Space</div> | |
| <div class="actions"> | |
| <div class="card act a-approve"><div class="act-name">approve</div><div class="act-desc">Safe, policy-compliant content</div></div> | |
| <div class="card act a-warn"><div class="act-name">warn</div><div class="act-desc">Minor first-time violation</div></div> | |
| <div class="card act a-remove"><div class="act-name">remove</div><div class="act-desc">Clear policy violation</div></div> | |
| <div class="card act a-bant"><div class="act-name">ban_temporary</div><div class="act-desc">Repeat or serious offense</div></div> | |
| <div class="card act a-banp"><div class="act-name">ban_permanent</div><div class="act-desc">Zero-tolerance or 5+ violations</div></div> | |
| <div class="card act a-esc"><div class="act-name">escalate</div><div class="act-desc">Ambiguous, appeals, or crisis</div></div> | |
| </div> | |
| <div class="stitle">API Endpoints</div> | |
| <div class="card ep-table"> | |
| <div class="ep"><span class="ep-m ep-g">GET</span><span class="ep-path">/health</span><span class="ep-info">Liveness check</span></div> | |
| <div class="ep"><span class="ep-m ep-g">GET</span><span class="ep-path">/tasks</span><span class="ep-info">List all tasks</span></div> | |
| <div class="ep"><span class="ep-m ep-p">POST</span><span class="ep-path">/reset?task_id=task_easy</span><span class="ep-info">Start episode, get session_id</span></div> | |
| <div class="ep"><span class="ep-m ep-p">POST</span><span class="ep-path">/step?session_id=abc123</span><span class="ep-info">Submit action, get reward</span></div> | |
| <div class="ep"><span class="ep-m ep-g">GET</span><span class="ep-path">/state?session_id=abc123</span><span class="ep-info">Full state + score</span></div> | |
| </div> | |
| <div class="stitle">Reward Design</div> | |
| <div class="card reward"> | |
| <div class="r-item"><div class="r-dot rg"></div><div><strong>Non-sparse:</strong> every post scores independently (0.0 - 1.0)</div></div> | |
| <div class="r-item"><div class="r-dot ry"></div><div><strong>Partial credit:</strong> one severity level off scores ~0.65 instead of 0.0</div></div> | |
| <div class="r-item"><div class="r-dot rb"></div><div><strong>Context bonus:</strong> +0.3 for history-dependent or context-dependent posts</div></div> | |
| <div class="r-item"><div class="r-dot rr"></div><div><strong>Violation ID:</strong> correctly identifying the violation category earns bonus</div></div> | |
| </div> | |
| <div class="stitle">Baseline Scores</div> | |
| <div class="card baseline"> | |
| <h3>Meta Llama 3 8B Instruct</h3> | |
| <div class="bl-sub">temperature=0 | seed=42 | reproducible</div> | |
| <div class="sc-row"><span class="sc-lbl" style="color:var(--teal)">Easy</span><div class="sc-bg"><div class="sc-bar sc-bar-g" style="width:50%"></div></div><span class="sc-val">0.500</span></div> | |
| <div class="sc-row"><span class="sc-lbl" style="color:var(--orange2)">Medium</span><div class="sc-bg"><div class="sc-bar sc-bar-y" style="width:53%"></div></div><span class="sc-val">0.533</span></div> | |
| <div class="sc-row"><span class="sc-lbl" style="color:var(--red-soft)">Hard</span><div class="sc-bg"><div class="sc-bar sc-bar-r" style="width:42%"></div></div><span class="sc-val">0.423</span></div> | |
| </div> | |
| <div class="ctas"> | |
| <a class="cta cta-1" href="/docs">Interactive API Docs</a> | |
| <a class="cta cta-2" href="/tasks">View Tasks</a> | |
| <a class="cta cta-3" href="/health">Health Check</a> | |
| </div> | |
| <div class="footer"> | |
| Content Moderation Queue — OpenEnv v1.0.0<br> | |
| Built for the Meta AI Hackathon | <a href="/docs">API Docs</a> | |
| </div> | |
| </div> | |
| </body> | |
| </html> | |
| """ | |
| def root(): | |
| return LANDING_HTML | |
| def custom_docs(): | |
| return HTMLResponse(f""" | |
| <!DOCTYPE html> | |
| <html><head> | |
| <title>Content Moderation Queue β API Docs</title> | |
| <meta charset="utf-8"/> | |
| <meta name="viewport" content="width=device-width, initial-scale=1"> | |
| <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/swagger-ui-dist@5/swagger-ui.css"> | |
| <style>{SWAGGER_CSS}</style> | |
| </head><body> | |
| <div id="swagger-ui"></div> | |
| <script src="https://cdn.jsdelivr.net/npm/swagger-ui-dist@5/swagger-ui-bundle.js"></script> | |
| <script> | |
| SwaggerUIBundle({{ | |
| url: '/openapi.json', | |
| dom_id: '#swagger-ui', | |
| presets: [SwaggerUIBundle.presets.apis, SwaggerUIBundle.SwaggerUIStandalonePreset], | |
| layout: "BaseLayout", | |
| defaultModelsExpandDepth: 1, | |
| docExpansion: "list", | |
| }}) | |
| </script> | |
| </body></html> | |
| """) | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # API Endpoints | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def health(): | |
| """Liveness probe β returns 200 when server is ready.""" | |
| return {"status": "ok", "environment": "content-moderation-queue", "version": "1.0.0"} | |
| def list_tasks(): | |
| """List all available tasks with metadata.""" | |
| return {"tasks": _shared_env.list_tasks()} | |
| def reset( | |
| task_id: str = Query(default="task_easy", description="One of: task_easy, task_medium, task_hard"), | |
| seed: Optional[int] = Query(default=None, description="Seed for post order. None=random each episode, integer=fixed reproducible order"), | |
| ): | |
| """ | |
| Start a new episode. Creates an **isolated session** for you. | |
| - **task_id**: Which task to run (task_easy | task_medium | task_hard) | |
| - **seed**: Optional. Omit for random post order (RL training). Pass integer (e.g. 42) for reproducible order. | |
| The response includes a **session_id** β copy it and pass it to every `/step` and `/state` call. | |
| """ | |
| try: | |
| sid, env = _new_session() | |
| obs = env.reset(task_id=task_id, seed=seed, session_id=sid) | |
| return obs | |
| except ValueError as e: | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| def step( | |
| action: Action, | |
| session_id: str = Query(..., description="session_id from /reset response"), | |
| ): | |
| """ | |
| Submit a moderation decision for the current post in your session. | |
| **Action fields:** | |
| - `action_type`: One of approve / warn / remove / ban_temporary / ban_permanent / escalate | |
| - `reasoning`: Optional explanation (logged, not graded) | |
| - `violation_type`: Optional β spam / hate_speech / harassment / misinformation / csam / illegal_services / doxxing / self_harm_risk / none | |
| Returns the next Observation, reward (0.0-1.0), done flag, and info dict. | |
| """ | |
| env = _get_session(session_id) | |
| try: | |
| return env.step(action) | |
| except RuntimeError as e: | |
| raise HTTPException(status_code=400, detail=str(e)) | |
| def state( | |
| session_id: str = Query(..., description="session_id from /reset response"), | |
| ): | |
| """ | |
| Return a full snapshot of your session's current state. | |
| Includes step count, cumulative reward, all decisions, and final_score once done. | |
| """ | |
| env = _get_session(session_id) | |
| return env.state() | |