| """Pydantic models for the OpenSleuth API and core state. |
| |
| Backwards-compat note: any field added to ``Observation`` / |
| ``StepResponse`` /``State`` after v0.2 carries a default value so the in-flight |
| trainer (which only inspects a small subset of fields) keeps working. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from typing import Any, List, Literal, Optional, Tuple, Union |
| from pydantic import BaseModel, ConfigDict, Field |
|
|
|
|
| class ProbeAction(BaseModel): |
| action_type: Literal["probe"] = "probe" |
| |
| |
| |
| |
| input_repr: str = Field(..., description="Python literal repr of the probe input") |
|
|
|
|
| class SubmitAction(BaseModel): |
| action_type: Literal["submit"] = "submit" |
| code: str = Field(..., description="Python source defining the target function") |
|
|
|
|
| Action = Union[ProbeAction, SubmitAction] |
|
|
|
|
| class ProbeRecord(BaseModel): |
| """One entry in the probe history. Output is either the function's return |
| value (Pythonic repr) or, if it raised, an error string.""" |
|
|
| input_repr: str |
| output_repr: str |
| is_error: bool = False |
| error_type: Optional[str] = None |
| |
| |
| bucket: Optional[str] = None |
|
|
|
|
| class Observation(BaseModel): |
| episode_id: str |
| target_function_name: str |
| target_function_signature: str = Field( |
| "", description="Human readable signature + docstring shown to the agent" |
| ) |
| probe_history: List[ProbeRecord] = Field(default_factory=list) |
| last_error: str = "" |
| steps_taken: int = 0 |
| max_steps: int = 25 |
| |
| difficulty: Optional[str] = Field( |
| None, description="Curriculum difficulty: easy / medium / hard." |
| ) |
| coverage_buckets_seen: int = Field( |
| 0, description="How many distinct input-domain buckets the agent has probed so far." |
| ) |
| seen_outputs_count: int = Field( |
| 0, description="How many distinct outputs the target function has produced so far." |
| ) |
| seen_error_types_count: int = Field( |
| 0, description="How many distinct error types the target function has raised so far." |
| ) |
|
|
|
|
| class StepResponse(BaseModel): |
| observation: Observation |
| reward: float |
| done: bool |
| info: dict = Field(default_factory=dict) |
|
|
|
|
| class State(BaseModel): |
| """Internal mutable state for one episode. Not exposed in /step responses |
| in full, but available via /state/{eid} for debugging.""" |
|
|
| model_config = ConfigDict(arbitrary_types_allowed=True) |
|
|
| episode_id: str |
| target_function_name: str |
| probe_history: List[ProbeRecord] = Field(default_factory=list) |
| seen_outputs: set = Field(default_factory=set) |
| seen_error_types: set = Field(default_factory=set) |
| seen_buckets: set = Field(default_factory=set) |
| steps_taken: int = 0 |
| done: bool = False |
| seed: int = 0 |
|
|
|
|
| class ResetRequest(BaseModel): |
| """Reset payload. |
| |
| The original (v0.3) shape ``{"target_name": "fibonacci", "seed": 0, |
| "max_steps": 25}`` still works exactly as before -- the four new fields |
| below are all optional and additive so the in-flight trainer doesn't |
| have to change. |
| |
| Open-ended (Level 2) targets are specified by passing ``target_code`` |
| + ``target_function_name`` (and optionally ``edge_cases`` and |
| ``fuzz_spec``), which is then resolved via the TaskCatalog using the |
| same hardened sandbox the verifier uses for agent submissions. |
| """ |
|
|
| target_name: Optional[str] = None |
| seed: int = 0 |
| max_steps: int = 25 |
| |
| target_code: Optional[str] = Field( |
| default=None, |
| description="Python source defining a black-box callable. When set, " |
| "overrides target_name (caller-supplied beats Hub beats builtin).", |
| ) |
| target_function_name: Optional[str] = Field( |
| default=None, |
| description="Name of the callable inside target_code to use as the " |
| "oracle. Required when target_code is set.", |
| ) |
| edge_cases: Optional[List[str]] = Field( |
| default=None, |
| description="Optional list of must-pass probe inputs as Python " |
| "literal strings (e.g. ['0', '\"\"', '([1,2,3], 2)']).", |
| ) |
| fuzz_spec: Optional[dict] = Field( |
| default=None, |
| description="Optional auto-fuzzer override map keyed by parameter " |
| "name, e.g. {'n': {'type': 'int', 'min': 1, 'max': 90}}.", |
| ) |
|
|
|
|
| class StepRequest(BaseModel): |
| episode_id: str |
| action: Action |
|
|