File size: 1,265 Bytes
139d3d1 f577d1f 139d3d1 f577d1f 139d3d1 f577d1f 139d3d1 f577d1f 9737348 139d3d1 9737348 139d3d1 f577d1f 139d3d1 9737348 ed81b09 725414c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | from __future__ import annotations
from openenv.core.env_server import Action, Observation, State
from pydantic import field_validator
class DeceitObservation(Observation):
"""What the agent sees each step."""
question: str = ""
context: list[str] = []
turn_index: int = 0
max_turns: int = 3
level: int = 1
class DeceitAction(Action):
"""What the agent produces each step.
Set is_final=True to commit an answer and end the episode.
Set is_final=False to think for another turn (costs a -0.05 step penalty).
"""
reasoning: str
answer: str = ""
confidence: float = 0.5
abstain: bool = False
is_final: bool = False
@field_validator("confidence")
@classmethod
def confidence_in_range(cls, v: float) -> float:
if not 0.0 <= v <= 1.0:
raise ValueError(f"confidence must be between 0.0 and 1.0, got {v}")
return v
class DeceitState(State):
"""What the environment tracks internally — never sent to agent."""
level: int = 1
ground_truth: str = ""
current_question_id: str = ""
episode_rewards: list[float] = []
prior_reasoning: list[str] = []
max_turns: int = 3
pressure_shown: bool = False
initial_context: list[str] = []
|