Jayant-Kernel
fix: persist initial context (distractors/pressure) across all episode turns
725414c unverified | from __future__ import annotations | |
| from openenv.core.env_server import Action, Observation, State | |
| from pydantic import field_validator | |
| class DeceitObservation(Observation): | |
| """What the agent sees each step.""" | |
| question: str = "" | |
| context: list[str] = [] | |
| turn_index: int = 0 | |
| max_turns: int = 3 | |
| level: int = 1 | |
| class DeceitAction(Action): | |
| """What the agent produces each step. | |
| Set is_final=True to commit an answer and end the episode. | |
| Set is_final=False to think for another turn (costs a -0.05 step penalty). | |
| """ | |
| reasoning: str | |
| answer: str = "" | |
| confidence: float = 0.5 | |
| abstain: bool = False | |
| is_final: bool = False | |
| def confidence_in_range(cls, v: float) -> float: | |
| if not 0.0 <= v <= 1.0: | |
| raise ValueError(f"confidence must be between 0.0 and 1.0, got {v}") | |
| return v | |
| class DeceitState(State): | |
| """What the environment tracks internally — never sent to agent.""" | |
| level: int = 1 | |
| ground_truth: str = "" | |
| current_question_id: str = "" | |
| episode_rewards: list[float] = [] | |
| prior_reasoning: list[str] = [] | |
| max_turns: int = 3 | |
| pressure_shown: bool = False | |
| initial_context: list[str] = [] | |