anugrah55's picture
Level 2 open-ended env: auto-fuzzer + TaskCatalog + Hub-driven catalog + extended /reset
77e65fb verified
"""Pydantic models for the OpenSleuth API and core state.
Backwards-compat note: any field added to ``Observation`` /
``StepResponse`` /``State`` after v0.2 carries a default value so the in-flight
trainer (which only inspects a small subset of fields) keeps working.
"""
from __future__ import annotations
from typing import Any, List, Literal, Optional, Tuple, Union
from pydantic import BaseModel, ConfigDict, Field
class ProbeAction(BaseModel):
action_type: Literal["probe"] = "probe"
# The agent submits inputs as a Python literal string (e.g. "5", "'abc'",
# "[1, 2, 3]"). We parse it server-side with ast.literal_eval. Keeping it
# as a string avoids a class of FastAPI auto-coercion bugs and matches
# what an LLM naturally emits.
input_repr: str = Field(..., description="Python literal repr of the probe input")
class SubmitAction(BaseModel):
action_type: Literal["submit"] = "submit"
code: str = Field(..., description="Python source defining the target function")
Action = Union[ProbeAction, SubmitAction]
class ProbeRecord(BaseModel):
"""One entry in the probe history. Output is either the function's return
value (Pythonic repr) or, if it raised, an error string."""
input_repr: str
output_repr: str
is_error: bool = False
error_type: Optional[str] = None
# Coverage bucket label assigned by the env when the probe was recorded.
# ``None`` for parse-error probes (we never executed the target).
bucket: Optional[str] = None
class Observation(BaseModel):
episode_id: str
target_function_name: str
target_function_signature: str = Field(
"", description="Human readable signature + docstring shown to the agent"
)
probe_history: List[ProbeRecord] = Field(default_factory=list)
last_error: str = ""
steps_taken: int = 0
max_steps: int = 25
# --- New, optional metadata fields (default-safe; trainer ignores them) ---
difficulty: Optional[str] = Field(
None, description="Curriculum difficulty: easy / medium / hard."
)
coverage_buckets_seen: int = Field(
0, description="How many distinct input-domain buckets the agent has probed so far."
)
seen_outputs_count: int = Field(
0, description="How many distinct outputs the target function has produced so far."
)
seen_error_types_count: int = Field(
0, description="How many distinct error types the target function has raised so far."
)
class StepResponse(BaseModel):
observation: Observation
reward: float
done: bool
info: dict = Field(default_factory=dict)
class State(BaseModel):
"""Internal mutable state for one episode. Not exposed in /step responses
in full, but available via /state/{eid} for debugging."""
model_config = ConfigDict(arbitrary_types_allowed=True)
episode_id: str
target_function_name: str
probe_history: List[ProbeRecord] = Field(default_factory=list)
seen_outputs: set = Field(default_factory=set)
seen_error_types: set = Field(default_factory=set)
seen_buckets: set = Field(default_factory=set)
steps_taken: int = 0
done: bool = False
seed: int = 0
class ResetRequest(BaseModel):
"""Reset payload.
The original (v0.3) shape ``{"target_name": "fibonacci", "seed": 0,
"max_steps": 25}`` still works exactly as before -- the four new fields
below are all optional and additive so the in-flight trainer doesn't
have to change.
Open-ended (Level 2) targets are specified by passing ``target_code``
+ ``target_function_name`` (and optionally ``edge_cases`` and
``fuzz_spec``), which is then resolved via the TaskCatalog using the
same hardened sandbox the verifier uses for agent submissions.
"""
target_name: Optional[str] = None
seed: int = 0
max_steps: int = 25
# --- Level 2 open-ended fields (additive, default-None) ---
target_code: Optional[str] = Field(
default=None,
description="Python source defining a black-box callable. When set, "
"overrides target_name (caller-supplied beats Hub beats builtin).",
)
target_function_name: Optional[str] = Field(
default=None,
description="Name of the callable inside target_code to use as the "
"oracle. Required when target_code is set.",
)
edge_cases: Optional[List[str]] = Field(
default=None,
description="Optional list of must-pass probe inputs as Python "
"literal strings (e.g. ['0', '\"\"', '([1,2,3], 2)']).",
)
fuzz_spec: Optional[dict] = Field(
default=None,
description="Optional auto-fuzzer override map keyed by parameter "
"name, e.g. {'n': {'type': 'int', 'min': 1, 'max': 90}}.",
)
class StepRequest(BaseModel):
episode_id: str
action: Action