"""Pydantic models for the OpenSleuth API and core state. Backwards-compat note: any field added to ``Observation`` / ``StepResponse`` /``State`` after v0.2 carries a default value so the in-flight trainer (which only inspects a small subset of fields) keeps working. """ from __future__ import annotations from typing import Any, List, Literal, Optional, Tuple, Union from pydantic import BaseModel, ConfigDict, Field class ProbeAction(BaseModel): action_type: Literal["probe"] = "probe" # The agent submits inputs as a Python literal string (e.g. "5", "'abc'", # "[1, 2, 3]"). We parse it server-side with ast.literal_eval. Keeping it # as a string avoids a class of FastAPI auto-coercion bugs and matches # what an LLM naturally emits. input_repr: str = Field(..., description="Python literal repr of the probe input") class SubmitAction(BaseModel): action_type: Literal["submit"] = "submit" code: str = Field(..., description="Python source defining the target function") Action = Union[ProbeAction, SubmitAction] class ProbeRecord(BaseModel): """One entry in the probe history. Output is either the function's return value (Pythonic repr) or, if it raised, an error string.""" input_repr: str output_repr: str is_error: bool = False error_type: Optional[str] = None # Coverage bucket label assigned by the env when the probe was recorded. # ``None`` for parse-error probes (we never executed the target). bucket: Optional[str] = None class Observation(BaseModel): episode_id: str target_function_name: str target_function_signature: str = Field( "", description="Human readable signature + docstring shown to the agent" ) probe_history: List[ProbeRecord] = Field(default_factory=list) last_error: str = "" steps_taken: int = 0 max_steps: int = 25 # --- New, optional metadata fields (default-safe; trainer ignores them) --- difficulty: Optional[str] = Field( None, description="Curriculum difficulty: easy / medium / hard." ) coverage_buckets_seen: int = Field( 0, description="How many distinct input-domain buckets the agent has probed so far." ) seen_outputs_count: int = Field( 0, description="How many distinct outputs the target function has produced so far." ) seen_error_types_count: int = Field( 0, description="How many distinct error types the target function has raised so far." ) class StepResponse(BaseModel): observation: Observation reward: float done: bool info: dict = Field(default_factory=dict) class State(BaseModel): """Internal mutable state for one episode. Not exposed in /step responses in full, but available via /state/{eid} for debugging.""" model_config = ConfigDict(arbitrary_types_allowed=True) episode_id: str target_function_name: str probe_history: List[ProbeRecord] = Field(default_factory=list) seen_outputs: set = Field(default_factory=set) seen_error_types: set = Field(default_factory=set) seen_buckets: set = Field(default_factory=set) steps_taken: int = 0 done: bool = False seed: int = 0 class ResetRequest(BaseModel): """Reset payload. The original (v0.3) shape ``{"target_name": "fibonacci", "seed": 0, "max_steps": 25}`` still works exactly as before -- the four new fields below are all optional and additive so the in-flight trainer doesn't have to change. Open-ended (Level 2) targets are specified by passing ``target_code`` + ``target_function_name`` (and optionally ``edge_cases`` and ``fuzz_spec``), which is then resolved via the TaskCatalog using the same hardened sandbox the verifier uses for agent submissions. """ target_name: Optional[str] = None seed: int = 0 max_steps: int = 25 # --- Level 2 open-ended fields (additive, default-None) --- target_code: Optional[str] = Field( default=None, description="Python source defining a black-box callable. When set, " "overrides target_name (caller-supplied beats Hub beats builtin).", ) target_function_name: Optional[str] = Field( default=None, description="Name of the callable inside target_code to use as the " "oracle. Required when target_code is set.", ) edge_cases: Optional[List[str]] = Field( default=None, description="Optional list of must-pass probe inputs as Python " "literal strings (e.g. ['0', '\"\"', '([1,2,3], 2)']).", ) fuzz_spec: Optional[dict] = Field( default=None, description="Optional auto-fuzzer override map keyed by parameter " "name, e.g. {'n': {'type': 'int', 'min': 1, 'max': 90}}.", ) class StepRequest(BaseModel): episode_id: str action: Action