File size: 3,414 Bytes
b0b140b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
"""Data models for the LandscapeForge environment.

OptCoder actions are modelled as a single unified Action with a `kind`
discriminator. Fields are optional per-kind and validated by a model
validator so the HTTP envelope stays flat and easy to serialize.
"""

from typing import Any, Literal, Optional

from openenv.core.env_server.types import Action, Observation
from pydantic import Field, model_validator


ActionKind = Literal["run_baseline", "draft", "inspect", "commit"]
BaselineName = Literal["sgd", "adam", "momentum", "lbfgs"]


# Per-action budget costs (§7.1 of LANDSCAPEFORGE_DESIGN.md).
ACTION_COSTS: dict[str, int] = {
    "run_baseline": 2,
    "draft": 2,
    "inspect": 1,
    "commit": 0,
}


class LandscapeforgeAction(Action):
    """OptCoder REPL action.

    A single class covers all four action kinds; `kind` discriminates and
    a model validator ensures each kind has its required fields.
    """

    kind: ActionKind = Field(..., description="Which REPL action")

    # run_baseline fields
    baseline_name: Optional[BaselineName] = Field(
        default=None, description="Reference optimizer to run"
    )
    # Note: steps count is env-controlled (BASELINE_STEPS in the env) — the
    # agent does not choose it. Kept off the schema so the LLM never emits it.

    # draft fields
    code: Optional[str] = Field(
        default=None, description="Full Optimizer class source (for kind='draft')"
    )

    # inspect fields
    draft_idx: Optional[int] = Field(
        default=None, ge=0, description="Which prior draft to inspect"
    )
    step_range_start: int = Field(default=0, ge=0)
    step_range_end: int = Field(default=20, ge=1, le=50)

    @model_validator(mode="after")
    def _check_kind_fields(self) -> "LandscapeforgeAction":
        k = self.kind
        if k == "run_baseline" and self.baseline_name is None:
            raise ValueError("run_baseline requires baseline_name")
        if k == "draft" and not self.code:
            raise ValueError("draft requires code")
        if k == "inspect" and self.draft_idx is None:
            raise ValueError("inspect requires draft_idx")
        return self


class LandscapeforgeObservation(Observation):
    """OptCoder's view of env state after an action.

    Fields are self-describing strings/structured data that fit into an
    LLM prompt. Heavy trajectory data is JSON-serializable lists.
    """

    # Stable across the episode
    landscape_description: str = Field(default="")
    dim: int = Field(default=0)
    structural_hints: dict[str, Any] = Field(default_factory=dict)

    # REPL state (grows over the episode)
    baseline_history: list[dict[str, Any]] = Field(default_factory=list)
    draft_history: list[dict[str, Any]] = Field(default_factory=list)
    inspect_requests: list[dict[str, Any]] = Field(default_factory=list)

    current_draft: Optional[str] = Field(default=None)
    budget_remaining: int = Field(default=0)

    # Result of the immediate step
    last_action_kind: Optional[str] = Field(default=None)
    last_action_result: dict[str, Any] = Field(default_factory=dict)

    # Terminal info (only populated after commit / budget exhausted)
    committed: bool = Field(default=False)
    final_regret: Optional[float] = Field(default=None)
    r_optcoder: Optional[float] = Field(default=None)
    r_optcoder_breakdown: dict[str, float] = Field(default_factory=dict)