"""Data models for the LandscapeForge environment. OptCoder actions are modelled as a single unified Action with a `kind` discriminator. Fields are optional per-kind and validated by a model validator so the HTTP envelope stays flat and easy to serialize. """ from typing import Any, Literal, Optional from openenv.core.env_server.types import Action, Observation from pydantic import Field, model_validator ActionKind = Literal["run_baseline", "draft", "inspect", "commit"] BaselineName = Literal["sgd", "adam", "momentum", "lbfgs"] # Per-action budget costs (§7.1 of LANDSCAPEFORGE_DESIGN.md). ACTION_COSTS: dict[str, int] = { "run_baseline": 2, "draft": 2, "inspect": 1, "commit": 0, } class LandscapeforgeAction(Action): """OptCoder REPL action. A single class covers all four action kinds; `kind` discriminates and a model validator ensures each kind has its required fields. """ kind: ActionKind = Field(..., description="Which REPL action") # run_baseline fields baseline_name: Optional[BaselineName] = Field( default=None, description="Reference optimizer to run" ) # Note: steps count is env-controlled (BASELINE_STEPS in the env) — the # agent does not choose it. Kept off the schema so the LLM never emits it. # draft fields code: Optional[str] = Field( default=None, description="Full Optimizer class source (for kind='draft')" ) # inspect fields draft_idx: Optional[int] = Field( default=None, ge=0, description="Which prior draft to inspect" ) step_range_start: int = Field(default=0, ge=0) step_range_end: int = Field(default=20, ge=1, le=50) @model_validator(mode="after") def _check_kind_fields(self) -> "LandscapeforgeAction": k = self.kind if k == "run_baseline" and self.baseline_name is None: raise ValueError("run_baseline requires baseline_name") if k == "draft" and not self.code: raise ValueError("draft requires code") if k == "inspect" and self.draft_idx is None: raise ValueError("inspect requires draft_idx") return self class LandscapeforgeObservation(Observation): """OptCoder's view of env state after an action. Fields are self-describing strings/structured data that fit into an LLM prompt. Heavy trajectory data is JSON-serializable lists. """ # Stable across the episode landscape_description: str = Field(default="") dim: int = Field(default=0) structural_hints: dict[str, Any] = Field(default_factory=dict) # REPL state (grows over the episode) baseline_history: list[dict[str, Any]] = Field(default_factory=list) draft_history: list[dict[str, Any]] = Field(default_factory=list) inspect_requests: list[dict[str, Any]] = Field(default_factory=list) current_draft: Optional[str] = Field(default=None) budget_remaining: int = Field(default=0) # Result of the immediate step last_action_kind: Optional[str] = Field(default=None) last_action_result: dict[str, Any] = Field(default_factory=dict) # Terminal info (only populated after commit / budget exhausted) committed: bool = Field(default=False) final_regret: Optional[float] = Field(default=None) r_optcoder: Optional[float] = Field(default=None) r_optcoder_breakdown: dict[str, float] = Field(default_factory=dict)