File size: 4,849 Bytes
536dda7 d3cd20c ee14542 d3cd20c ee14542 d3cd20c ee14542 d3cd20c 536dda7 d3cd20c ee14542 d3cd20c ee14542 d3cd20c 536dda7 d3cd20c ee14542 d3cd20c ee14542 d3cd20c 536dda7 d3cd20c 77e65fb d3cd20c 77e65fb d3cd20c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 | """Pydantic models for the OpenSleuth API and core state.
Backwards-compat note: any field added to ``Observation`` /
``StepResponse`` /``State`` after v0.2 carries a default value so the in-flight
trainer (which only inspects a small subset of fields) keeps working.
"""
from __future__ import annotations
from typing import Any, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel, ConfigDict, Field
class ProbeAction(BaseModel):
action_type: Literal["probe"] = "probe"
# The agent submits inputs as a Python literal string (e.g. "5", "'abc'",
# "[1, 2, 3]"). We parse it server-side with ast.literal_eval. Keeping it
# as a string avoids a class of FastAPI auto-coercion bugs and matches
# what an LLM naturally emits.
input_repr: str = Field(..., description="Python literal repr of the probe input")
class SubmitAction(BaseModel):
action_type: Literal["submit"] = "submit"
code: str = Field(..., description="Python source defining the target function")
Action = Union[ProbeAction, SubmitAction]
class ProbeRecord(BaseModel):
"""One entry in the probe history. Output is either the function's return
value (Pythonic repr) or, if it raised, an error string."""
input_repr: str
output_repr: str
is_error: bool = False
error_type: Optional[str] = None
# Coverage bucket label assigned by the env when the probe was recorded.
# ``None`` for parse-error probes (we never executed the target).
bucket: Optional[str] = None
class Observation(BaseModel):
episode_id: str
target_function_name: str
target_function_signature: str = Field(
"", description="Human readable signature + docstring shown to the agent"
)
probe_history: List[ProbeRecord] = Field(default_factory=list)
last_error: str = ""
steps_taken: int = 0
max_steps: int = 25
# --- New, optional metadata fields (default-safe; trainer ignores them) ---
difficulty: Optional[str] = Field(
None, description="Curriculum difficulty: easy / medium / hard."
)
coverage_buckets_seen: int = Field(
0, description="How many distinct input-domain buckets the agent has probed so far."
)
seen_outputs_count: int = Field(
0, description="How many distinct outputs the target function has produced so far."
)
seen_error_types_count: int = Field(
0, description="How many distinct error types the target function has raised so far."
)
class StepResponse(BaseModel):
observation: Observation
reward: float
done: bool
info: dict = Field(default_factory=dict)
class State(BaseModel):
"""Internal mutable state for one episode. Not exposed in /step responses
in full, but available via /state/{eid} for debugging."""
model_config = ConfigDict(arbitrary_types_allowed=True)
episode_id: str
target_function_name: str
probe_history: List[ProbeRecord] = Field(default_factory=list)
seen_outputs: set = Field(default_factory=set)
seen_error_types: set = Field(default_factory=set)
seen_buckets: set = Field(default_factory=set)
steps_taken: int = 0
done: bool = False
seed: int = 0
class ResetRequest(BaseModel):
"""Reset payload.
The original (v0.3) shape ``{"target_name": "fibonacci", "seed": 0,
"max_steps": 25}`` still works exactly as before -- the four new fields
below are all optional and additive so the in-flight trainer doesn't
have to change.
Open-ended (Level 2) targets are specified by passing ``target_code``
+ ``target_function_name`` (and optionally ``edge_cases`` and
``fuzz_spec``), which is then resolved via the TaskCatalog using the
same hardened sandbox the verifier uses for agent submissions.
"""
target_name: Optional[str] = None
seed: int = 0
max_steps: int = 25
# --- Level 2 open-ended fields (additive, default-None) ---
target_code: Optional[str] = Field(
default=None,
description="Python source defining a black-box callable. When set, "
"overrides target_name (caller-supplied beats Hub beats builtin).",
)
target_function_name: Optional[str] = Field(
default=None,
description="Name of the callable inside target_code to use as the "
"oracle. Required when target_code is set.",
)
edge_cases: Optional[List[str]] = Field(
default=None,
description="Optional list of must-pass probe inputs as Python "
"literal strings (e.g. ['0', '\"\"', '([1,2,3], 2)']).",
)
fuzz_spec: Optional[dict] = Field(
default=None,
description="Optional auto-fuzzer override map keyed by parameter "
"name, e.g. {'n': {'type': 'int', 'min': 1, 'max': 90}}.",
)
class StepRequest(BaseModel):
episode_id: str
action: Action
|