Spaces:
Sleeping
Sleeping
File size: 2,832 Bytes
ec4ae03 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 | # Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""AxiomForgeAI Math RL Environment Client."""
from typing import Any, Dict, Optional
from openenv.core import EnvClient
from openenv.core.client_types import StepResult
from openenv.core.env_server.types import State
from .models import AxiomforgeaiAction, AxiomforgeaiObservation
class AxiomforgeaiEnv(
EnvClient[AxiomforgeaiAction, AxiomforgeaiObservation, State]
):
"""
Client for the AxiomForgeAI math RL environment.
Maintains a persistent WebSocket connection to the environment server.
Each client instance gets its own session with independent episode state.
Episode flow::
with AxiomforgeaiEnv(base_url="http://localhost:8000") as env:
# 1. Reset — receive a math question
result = env.reset()
question = result.observation.question
# 2. Step — submit a solution, receive reward + feedback
solution = "Step 1: ... Final Answer: 42"
result = env.step(AxiomforgeaiAction(solution=solution))
print(result.reward, result.observation.feedback)
Example with Docker::
client = AxiomforgeaiEnv.from_docker_image("axiomforgeai-env:latest")
try:
result = client.reset()
result = client.step(AxiomforgeaiAction(solution="Final Answer: 17"))
finally:
client.close()
"""
def _step_payload(self, action: AxiomforgeaiAction) -> Dict[str, Any]:
"""Convert AxiomforgeaiAction to JSON payload for the step endpoint."""
return {"solution": action.solution}
def _parse_result(self, payload: Dict[str, Any]) -> StepResult[AxiomforgeaiObservation]:
"""Parse the server's step response into a StepResult."""
obs_data: Dict[str, Any] = payload.get("observation", {})
observation = AxiomforgeaiObservation(
question=obs_data.get("question", ""),
topic=obs_data.get("topic", ""),
difficulty=float(obs_data.get("difficulty", 0.5)),
feedback=obs_data.get("feedback", ""),
done=payload.get("done", False),
reward=payload.get("reward"),
metadata=obs_data.get("metadata"),
)
return StepResult(
observation=observation,
reward=payload.get("reward"),
done=payload.get("done", False),
)
def _parse_state(self, payload: Dict[str, Any]) -> State:
"""Parse the server's state response into a State object."""
return State(
episode_id=payload.get("episode_id"),
step_count=payload.get("step_count", 0),
)
|