# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. """ Data models for the AutoMathReasoner Environment. """ from typing import List, Dict, Any from pydantic import Field from openenv.core.env_server.types import Action, Observation class AutomathreasonerAction(Action): """Action for the AutoMathReasoner environment - containing reasoning and final answer.""" reasoning: str = Field(default="", description="The step-by-step mathematical reasoning.") final_answer: str = Field(default="", description="The final numerical or algebraic answer.") class AutomathreasonerObservation(Observation): """Observation from the AutoMathReasoner environment.""" problem_text: str = Field(default="", description="The text of the generated math problem.") difficulty_level: float = Field(default=1.0, description="The current difficulty level of the problem.") history: List[Dict[str, Any]] = Field(default_factory=list, description="History of the last 3 attempts for this problem.") # Required by OpenEnv base class reward: float = Field(default=0.0, description="Reward received from the previous action.") done: bool = Field(default=False, description="Whether the episode has ended.")