File size: 3,555 Bytes
72de9a9
 
 
 
 
 
 
a4715a7
72de9a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a4715a7
 
 
 
 
 
 
 
 
 
 
 
d73bfc0
 
 
 
 
 
 
a4715a7
 
 
 
 
 
 
d73bfc0
 
 
 
 
 
 
 
 
 
 
 
 
 
a4715a7
 
 
 
 
 
 
 
d73bfc0
 
 
 
 
 
 
a4715a7
 
 
 
 
 
 
 
 
 
 
d73bfc0
 
 
 
 
a4715a7
d73bfc0
a4715a7
 
 
72de9a9
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
FastAPI application for the API Debug Environment.

Uses OpenEnv's create_app() to generate all endpoints:
POST /reset, POST /step, GET /state, GET /schema, WS /ws, GET /health
"""

from fastapi.responses import JSONResponse
from openenv.core.env_server.http_server import create_app

try:
    from ..models import APIDebugAction, APIDebugObservation
    from .environment import APIDebugEnvironment
except ImportError:
    from models import APIDebugAction, APIDebugObservation
    from server.environment import APIDebugEnvironment

app = create_app(
    APIDebugEnvironment,
    APIDebugAction,
    APIDebugObservation,
    env_name="api_debug",
    max_concurrent_envs=10,
)


@app.get("/tasks")
def list_tasks():
    """List all available tasks, their configuration, and supported error types."""
    return JSONResponse({
        "tasks": [
            {
                "name": "easy",
                "max_steps": 3,
                "error_count": 1,
                "grading": "deterministic",
                "description": "Identify the error type and affected fields",
            },
            {
                "name": "classify",
                "max_steps": 4,
                "error_count": "2-3",
                "grading": "deterministic",
                "description": "Identify ALL error types and affected fields across multiple errors",
            },
            {
                "name": "medium",
                "max_steps": 5,
                "error_count": 1,
                "grading": "deterministic",
                "description": "Fix the broken request to match the API spec",
            },
            {
                "name": "headers",
                "max_steps": 4,
                "error_count": 1,
                "grading": "deterministic",
                "description": "Fix request headers (auth, content-type, tokens)",
            },
            {
                "name": "response",
                "max_steps": 4,
                "error_count": "1-2",
                "grading": "deterministic",
                "description": "Validate API response: identify wrong status codes, missing fields, type errors, data leaks",
            },
            {
                "name": "hard",
                "max_steps": 7,
                "error_count": "2-3",
                "grading": "70% deterministic + 30% LLM-as-judge",
                "description": "Fix the request and explain the fix for developers",
            },
        ],
        "response_issue_types": [
            "wrong_status_code",
            "missing_response_field",
            "wrong_response_type",
            "extra_response_field",
            "inconsistent_error_format",
        ],
        "error_types": [
            "missing_required_field",
            "wrong_field_type",
            "invalid_email_format",
            "missing_auth_header",
            "extra_unknown_field",
            "null_value_in_required",
            "wrong_http_method",
            "malformed_json_value",
            "invalid_enum_value",
            "datetime_format_error",
            "wrong_content_type",
            "expired_auth_token",
            "wrong_status_code",
            "redirect_loop",
            "rate_limit_headers",
        ],
        "api_spec_count": 45,
    })


def main():
    """Run the server directly."""
    import sys
    import uvicorn

    port = 8000
    if len(sys.argv) > 1:
        port = int(sys.argv[1])

    uvicorn.run(app, host="0.0.0.0", port=port)


if __name__ == "__main__":
    main()