anugrah55's picture
Apply paper-driven improvements: stratified verifier, sandbox hardening, coverage bonus, anti-hacking penalties, curriculum metadata
536dda7 verified
raw
history blame
2.48 kB
"""FastAPI server exposing the OpenSleuth environment over HTTP."""
from __future__ import annotations
import logging
from typing import Optional
from fastapi import FastAPI, HTTPException, Query
from opensleuth_env import (
BLACK_BOX_FUNCTIONS,
OpenSleuthEnv,
ProbeAction,
ResetRequest,
StepRequest,
StepResponse,
SubmitAction,
)
logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
log = logging.getLogger("opensleuth.server")
app = FastAPI(title="OpenSleuth Env", version="0.3.0")
env = OpenSleuthEnv()
@app.get("/health")
def health():
return {"status": "ok", "episodes_tracked": len(env._states)} # noqa: SLF001
@app.get("/functions")
def list_functions(
difficulty: Optional[str] = Query(
None,
description="Optional filter: easy / medium / hard. Used by the trainer for curriculum scheduling.",
),
):
items = []
for s in BLACK_BOX_FUNCTIONS.values():
if difficulty is not None and getattr(s, "difficulty", None) != difficulty:
continue
items.append(
{
"name": s.name,
"signature": s.signature,
"description": s.description,
"difficulty": getattr(s, "difficulty", None),
"edge_case_count": len(getattr(s, "edge_cases", []) or []),
}
)
return {"functions": items}
@app.post("/reset")
def reset(req: ResetRequest):
try:
obs = env.reset(target_name=req.target_name, seed=req.seed, max_steps=req.max_steps)
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e)) from e
return obs
@app.post("/step", response_model=StepResponse)
def step(req: StepRequest):
try:
return env.step(req.episode_id, req.action)
except KeyError as e:
raise HTTPException(status_code=404, detail=str(e)) from e
@app.get("/state/{episode_id}")
def get_state(episode_id: str):
state = env.get_state(episode_id)
if not state:
raise HTTPException(status_code=404, detail=f"Unknown episode_id {episode_id!r}")
return state
# Convenience: a flat /step that does reset+step in one call is occasionally
# useful for shell-style debugging.
@app.post("/probe_once")
def probe_once(target_name: str, input_repr: str):
obs = env.reset(target_name=target_name)
resp = env.step(obs.episode_id, ProbeAction(input_repr=input_repr))
return resp