Spaces:

anugrah55
/

opensleuth-env-gemini-cli

Paused

App Files Files Community

opensleuth-env-gemini-cli / opensleuth_env /models.py

anugrah55

Level 2 open-ended env: auto-fuzzer + TaskCatalog + Hub-driven catalog + extended /reset

77e65fb verified 13 days ago

raw

history blame contribute delete

4.85 kB

	"""Pydantic models for the OpenSleuth API and core state.

	Backwards-compat note: any field added to ``Observation`` /
	``StepResponse`` /``State`` after v0.2 carries a default value so the in-flight
	trainer (which only inspects a small subset of fields) keeps working.
	"""

	from __future__ import annotations

	from typing import Any, List, Literal, Optional, Tuple, Union
	from pydantic import BaseModel, ConfigDict, Field


	class ProbeAction(BaseModel):
	action_type: Literal["probe"] = "probe"
	# The agent submits inputs as a Python literal string (e.g. "5", "'abc'",
	# "[1, 2, 3]"). We parse it server-side with ast.literal_eval. Keeping it
	# as a string avoids a class of FastAPI auto-coercion bugs and matches
	# what an LLM naturally emits.
	input_repr: str = Field(..., description="Python literal repr of the probe input")


	class SubmitAction(BaseModel):
	action_type: Literal["submit"] = "submit"
	code: str = Field(..., description="Python source defining the target function")


	Action = Union[ProbeAction, SubmitAction]


	class ProbeRecord(BaseModel):
	"""One entry in the probe history. Output is either the function's return
	value (Pythonic repr) or, if it raised, an error string."""

	input_repr: str
	output_repr: str
	is_error: bool = False
	error_type: Optional[str] = None
	# Coverage bucket label assigned by the env when the probe was recorded.
	# ``None`` for parse-error probes (we never executed the target).
	bucket: Optional[str] = None


	class Observation(BaseModel):
	episode_id: str
	target_function_name: str
	target_function_signature: str = Field(
	"", description="Human readable signature + docstring shown to the agent"
	)
	probe_history: List[ProbeRecord] = Field(default_factory=list)
	last_error: str = ""
	steps_taken: int = 0
	max_steps: int = 25
	# --- New, optional metadata fields (default-safe; trainer ignores them) ---
	difficulty: Optional[str] = Field(
	None, description="Curriculum difficulty: easy / medium / hard."
	)
	coverage_buckets_seen: int = Field(
	0, description="How many distinct input-domain buckets the agent has probed so far."
	)
	seen_outputs_count: int = Field(
	0, description="How many distinct outputs the target function has produced so far."
	)
	seen_error_types_count: int = Field(
	0, description="How many distinct error types the target function has raised so far."
	)


	class StepResponse(BaseModel):
	observation: Observation
	reward: float
	done: bool
	info: dict = Field(default_factory=dict)


	class State(BaseModel):
	"""Internal mutable state for one episode. Not exposed in /step responses
	in full, but available via /state/{eid} for debugging."""

	model_config = ConfigDict(arbitrary_types_allowed=True)

	episode_id: str
	target_function_name: str
	probe_history: List[ProbeRecord] = Field(default_factory=list)
	seen_outputs: set = Field(default_factory=set)
	seen_error_types: set = Field(default_factory=set)
	seen_buckets: set = Field(default_factory=set)
	steps_taken: int = 0
	done: bool = False
	seed: int = 0


	class ResetRequest(BaseModel):
	"""Reset payload.

	The original (v0.3) shape ``{"target_name": "fibonacci", "seed": 0,
	"max_steps": 25}`` still works exactly as before -- the four new fields
	below are all optional and additive so the in-flight trainer doesn't
	have to change.

	Open-ended (Level 2) targets are specified by passing ``target_code``
	+ ``target_function_name`` (and optionally ``edge_cases`` and
	``fuzz_spec``), which is then resolved via the TaskCatalog using the
	same hardened sandbox the verifier uses for agent submissions.
	"""

	target_name: Optional[str] = None
	seed: int = 0
	max_steps: int = 25
	# --- Level 2 open-ended fields (additive, default-None) ---
	target_code: Optional[str] = Field(
	default=None,
	description="Python source defining a black-box callable. When set, "
	"overrides target_name (caller-supplied beats Hub beats builtin).",
	)
	target_function_name: Optional[str] = Field(
	default=None,
	description="Name of the callable inside target_code to use as the "
	"oracle. Required when target_code is set.",
	)
	edge_cases: Optional[List[str]] = Field(
	default=None,
	description="Optional list of must-pass probe inputs as Python "
	"literal strings (e.g. ['0', '\"\"', '([1,2,3], 2)']).",
	)
	fuzz_spec: Optional[dict] = Field(
	default=None,
	description="Optional auto-fuzzer override map keyed by parameter "
	"name, e.g. {'n': {'type': 'int', 'min': 1, 'max': 90}}.",
	)


	class StepRequest(BaseModel):
	episode_id: str
	action: Action