Spaces:
Sleeping
Sleeping
File size: 3,414 Bytes
b0b140b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | """Data models for the LandscapeForge environment.
OptCoder actions are modelled as a single unified Action with a `kind`
discriminator. Fields are optional per-kind and validated by a model
validator so the HTTP envelope stays flat and easy to serialize.
"""
from typing import Any, Literal, Optional
from openenv.core.env_server.types import Action, Observation
from pydantic import Field, model_validator
ActionKind = Literal["run_baseline", "draft", "inspect", "commit"]
BaselineName = Literal["sgd", "adam", "momentum", "lbfgs"]
# Per-action budget costs (§7.1 of LANDSCAPEFORGE_DESIGN.md).
ACTION_COSTS: dict[str, int] = {
"run_baseline": 2,
"draft": 2,
"inspect": 1,
"commit": 0,
}
class LandscapeforgeAction(Action):
"""OptCoder REPL action.
A single class covers all four action kinds; `kind` discriminates and
a model validator ensures each kind has its required fields.
"""
kind: ActionKind = Field(..., description="Which REPL action")
# run_baseline fields
baseline_name: Optional[BaselineName] = Field(
default=None, description="Reference optimizer to run"
)
# Note: steps count is env-controlled (BASELINE_STEPS in the env) — the
# agent does not choose it. Kept off the schema so the LLM never emits it.
# draft fields
code: Optional[str] = Field(
default=None, description="Full Optimizer class source (for kind='draft')"
)
# inspect fields
draft_idx: Optional[int] = Field(
default=None, ge=0, description="Which prior draft to inspect"
)
step_range_start: int = Field(default=0, ge=0)
step_range_end: int = Field(default=20, ge=1, le=50)
@model_validator(mode="after")
def _check_kind_fields(self) -> "LandscapeforgeAction":
k = self.kind
if k == "run_baseline" and self.baseline_name is None:
raise ValueError("run_baseline requires baseline_name")
if k == "draft" and not self.code:
raise ValueError("draft requires code")
if k == "inspect" and self.draft_idx is None:
raise ValueError("inspect requires draft_idx")
return self
class LandscapeforgeObservation(Observation):
"""OptCoder's view of env state after an action.
Fields are self-describing strings/structured data that fit into an
LLM prompt. Heavy trajectory data is JSON-serializable lists.
"""
# Stable across the episode
landscape_description: str = Field(default="")
dim: int = Field(default=0)
structural_hints: dict[str, Any] = Field(default_factory=dict)
# REPL state (grows over the episode)
baseline_history: list[dict[str, Any]] = Field(default_factory=list)
draft_history: list[dict[str, Any]] = Field(default_factory=list)
inspect_requests: list[dict[str, Any]] = Field(default_factory=list)
current_draft: Optional[str] = Field(default=None)
budget_remaining: int = Field(default=0)
# Result of the immediate step
last_action_kind: Optional[str] = Field(default=None)
last_action_result: dict[str, Any] = Field(default_factory=dict)
# Terminal info (only populated after commit / budget exhausted)
committed: bool = Field(default=False)
final_regret: Optional[float] = Field(default=None)
r_optcoder: Optional[float] = Field(default=None)
r_optcoder_breakdown: dict[str, float] = Field(default_factory=dict)
|