OGrohit's picture
Day 2: environment.py, log_generator.py, single_crash scenario, real endpoints
5884d9c
raw
history blame
4.07 kB
from fastapi import FastAPI, Query
from fastapi.responses import JSONResponse
import uvicorn
from server.models import TriageAction
from server.environment import LogTriageEnvironment
app = FastAPI(
title="LogTriageEnv",
description="OpenEnv environment for SRE incident triage",
version="1.0.0",
)
# One environment instance per server process
env = LogTriageEnvironment()
@app.get("/health")
def health():
return {"status": "ok", "environment": "logtriage-env", "version": "1.0.0"}
@app.post("/reset")
def reset(
task: str = Query(default="single_crash", description="Task ID to run"),
seed: int = Query(default=None, description="Random seed for reproducibility"),
):
try:
obs = env.reset(task_id=task, seed=seed)
return obs.model_dump()
except ValueError as e:
return JSONResponse(status_code=400, content={"error": str(e)})
@app.post("/step")
def step(action: TriageAction):
valid, err = action.is_valid()
if not valid:
return JSONResponse(status_code=422, content={"error": err})
try:
obs = env.step(action)
return obs.model_dump()
except RuntimeError as e:
return JSONResponse(status_code=400, content={"error": str(e)})
@app.get("/state")
def state():
try:
return env.state.model_dump()
except RuntimeError as e:
return JSONResponse(status_code=400, content={"error": str(e)})
@app.get("/tasks")
def get_tasks():
return {
"tasks": [
{
"id": "single_crash",
"name": "Single Service Crash",
"difficulty": "easy",
"max_steps": 8,
"description": "One service crashes. Classify severity, find root cause, remediate.",
"action_schema": {
"action_type": "classify_severity | identify_root_cause | escalate | remediate | request_more_logs | resolve | ignore",
"value": "string (depends on action_type β€” see README)",
"confidence": "float [0.0, 1.0]",
"reasoning": "string (optional)",
},
},
{
"id": "cascading_failure",
"name": "Cascading Failure",
"difficulty": "medium",
"max_steps": 12,
"description": "DB slowdown cascades upstream. Find the true root cause, not symptoms.",
"action_schema": {
"action_type": "classify_severity | identify_root_cause | escalate | remediate | request_more_logs | resolve | ignore",
"value": "string (depends on action_type β€” see README)",
"confidence": "float [0.0, 1.0]",
"reasoning": "string (optional)",
},
},
{
"id": "silent_degradation",
"name": "Silent Degradation with Noise",
"difficulty": "hard",
"max_steps": 15,
"description": "Slow degradation hidden in 60% noise. Nuanced P2 severity judgment.",
"action_schema": {
"action_type": "classify_severity | identify_root_cause | escalate | remediate | request_more_logs | resolve | ignore",
"value": "string (depends on action_type β€” see README)",
"confidence": "float [0.0, 1.0]",
"reasoning": "string (optional)",
},
},
]
}
@app.post("/grader")
def grader():
score = env.get_grader_score()
return {
"score": score,
"episode_id": env.state.episode_id if env._state else None,
"task_id": env._task_id,
"steps_taken": env.state.step_count if env._state else 0,
}
@app.post("/baseline")
def baseline():
# TODO Day 5: wire to baseline.py
return {"message": "baseline endpoint β€” to be wired on Day 5"}
if __name__ == "__main__":
uvicorn.run("server.app:app", host="0.0.0.0", port=7860, reload=True)