| |
| |
| |
| |
| |
|
|
| """ |
| Data models for the My Env Environment. |
| |
| The my_env environment is a simple test environment that echoes back messages. |
| Meta-optimizer models support the meta-learning RL optimizer environment. |
| """ |
|
|
| from pydantic import Field |
|
|
| from openenv.core.env_server.types import Action, Observation |
|
|
|
|
| class MyAction(Action): |
| """Action for the My Env environment - just a message to echo.""" |
|
|
| message: str = Field(..., description="Message to echo back") |
|
|
|
|
| class MyObservation(Observation): |
| """Observation from the My Env environment - the echoed message.""" |
|
|
| echoed_message: str = Field(default="", description="The echoed message") |
| message_length: int = Field(default=0, description="Length of the echoed message") |
|
|
|
|
| |
|
|
|
|
| class MetaOptimizerAction(Action): |
| """Action for the meta-optimizer environment: control optimizer hyperparameters per step.""" |
|
|
| lr_scale: float = Field( |
| ..., |
| ge=1e-4, |
| le=1.0, |
| description="Learning rate scale for this step (e.g. 1e-4 to 1.0)", |
| ) |
| momentum_coef: float = Field( |
| ..., |
| ge=0.0, |
| le=1.0, |
| description="Momentum coefficient (0 = no momentum, 1 = full carry)", |
| ) |
| grad_clip_threshold: float = Field( |
| ..., |
| ge=0.0, |
| description="Gradient clipping threshold (0 = no clipping)", |
| ) |
| weight_decay_this_step: float = Field( |
| ..., |
| ge=0.0, |
| description="Weight decay (L2) scale for this step (0 = no weight decay)", |
| ) |
|
|
|
|
| class MetaOptimizerObservation(Observation): |
| """Observation from the meta-optimizer environment: loss, step, and optional grad norm.""" |
|
|
| loss: float = Field(..., description="Current loss after last update") |
| step_count: int = Field(..., description="Current step in the episode") |
| grad_norm: float | None = Field( |
| default=None, |
| description="Global gradient norm before last update (if available)", |
| ) |
| steps_to_threshold: int | None = Field( |
| default=None, |
| description="Step at which loss first reached threshold (None if not yet reached)", |
| ) |
| perplexity: float | None = Field( |
| default=None, |
| description="exp(loss) for language modeling (None for regression)", |
| ) |
| |