File size: 4,849 Bytes
536dda7
 
 
 
 
 
d3cd20c
 
 
 
 
 
ee14542
 
 
d3cd20c
 
 
 
 
 
ee14542
 
 
d3cd20c
 
ee14542
 
 
d3cd20c
 
 
 
 
 
 
 
 
536dda7
 
 
d3cd20c
 
ee14542
d3cd20c
 
 
 
ee14542
d3cd20c
 
 
 
536dda7
 
 
 
 
 
 
 
 
 
 
 
 
d3cd20c
 
 
 
 
 
 
 
ee14542
 
d3cd20c
 
 
 
 
 
ee14542
d3cd20c
 
 
536dda7
d3cd20c
 
 
 
 
 
77e65fb
 
 
 
 
 
 
 
 
 
 
 
 
 
d3cd20c
 
77e65fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d3cd20c
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
"""Pydantic models for the OpenSleuth API and core state.

Backwards-compat note: any field added to ``Observation`` /
``StepResponse`` /``State`` after v0.2 carries a default value so the in-flight
trainer (which only inspects a small subset of fields) keeps working.
"""

from __future__ import annotations

from typing import Any, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel, ConfigDict, Field


class ProbeAction(BaseModel):
    action_type: Literal["probe"] = "probe"
    # The agent submits inputs as a Python literal string (e.g. "5", "'abc'",
    # "[1, 2, 3]"). We parse it server-side with ast.literal_eval. Keeping it
    # as a string avoids a class of FastAPI auto-coercion bugs and matches
    # what an LLM naturally emits.
    input_repr: str = Field(..., description="Python literal repr of the probe input")


class SubmitAction(BaseModel):
    action_type: Literal["submit"] = "submit"
    code: str = Field(..., description="Python source defining the target function")


Action = Union[ProbeAction, SubmitAction]


class ProbeRecord(BaseModel):
    """One entry in the probe history. Output is either the function's return
    value (Pythonic repr) or, if it raised, an error string."""

    input_repr: str
    output_repr: str
    is_error: bool = False
    error_type: Optional[str] = None
    # Coverage bucket label assigned by the env when the probe was recorded.
    # ``None`` for parse-error probes (we never executed the target).
    bucket: Optional[str] = None


class Observation(BaseModel):
    episode_id: str
    target_function_name: str
    target_function_signature: str = Field(
        "", description="Human readable signature + docstring shown to the agent"
    )
    probe_history: List[ProbeRecord] = Field(default_factory=list)
    last_error: str = ""
    steps_taken: int = 0
    max_steps: int = 25
    # --- New, optional metadata fields (default-safe; trainer ignores them) ---
    difficulty: Optional[str] = Field(
        None, description="Curriculum difficulty: easy / medium / hard."
    )
    coverage_buckets_seen: int = Field(
        0, description="How many distinct input-domain buckets the agent has probed so far."
    )
    seen_outputs_count: int = Field(
        0, description="How many distinct outputs the target function has produced so far."
    )
    seen_error_types_count: int = Field(
        0, description="How many distinct error types the target function has raised so far."
    )


class StepResponse(BaseModel):
    observation: Observation
    reward: float
    done: bool
    info: dict = Field(default_factory=dict)


class State(BaseModel):
    """Internal mutable state for one episode. Not exposed in /step responses
    in full, but available via /state/{eid} for debugging."""

    model_config = ConfigDict(arbitrary_types_allowed=True)

    episode_id: str
    target_function_name: str
    probe_history: List[ProbeRecord] = Field(default_factory=list)
    seen_outputs: set = Field(default_factory=set)
    seen_error_types: set = Field(default_factory=set)
    seen_buckets: set = Field(default_factory=set)
    steps_taken: int = 0
    done: bool = False
    seed: int = 0


class ResetRequest(BaseModel):
    """Reset payload.

    The original (v0.3) shape ``{"target_name": "fibonacci", "seed": 0,
    "max_steps": 25}`` still works exactly as before -- the four new fields
    below are all optional and additive so the in-flight trainer doesn't
    have to change.

    Open-ended (Level 2) targets are specified by passing ``target_code``
    + ``target_function_name`` (and optionally ``edge_cases`` and
    ``fuzz_spec``), which is then resolved via the TaskCatalog using the
    same hardened sandbox the verifier uses for agent submissions.
    """

    target_name: Optional[str] = None
    seed: int = 0
    max_steps: int = 25
    # --- Level 2 open-ended fields (additive, default-None) ---
    target_code: Optional[str] = Field(
        default=None,
        description="Python source defining a black-box callable. When set, "
        "overrides target_name (caller-supplied beats Hub beats builtin).",
    )
    target_function_name: Optional[str] = Field(
        default=None,
        description="Name of the callable inside target_code to use as the "
        "oracle. Required when target_code is set.",
    )
    edge_cases: Optional[List[str]] = Field(
        default=None,
        description="Optional list of must-pass probe inputs as Python "
        "literal strings (e.g. ['0', '\"\"', '([1,2,3], 2)']).",
    )
    fuzz_spec: Optional[dict] = Field(
        default=None,
        description="Optional auto-fuzzer override map keyed by parameter "
        "name, e.g. {'n': {'type': 'int', 'min': 1, 'max': 90}}.",
    )


class StepRequest(BaseModel):
    episode_id: str
    action: Action