File size: 1,265 Bytes
139d3d1
 
f577d1f
 
139d3d1
 
f577d1f
139d3d1
 
f577d1f
139d3d1
 
 
 
 
 
f577d1f
9737348
 
 
 
 
139d3d1
 
 
 
 
9737348
139d3d1
 
 
 
 
 
 
 
 
f577d1f
139d3d1
 
 
 
 
 
9737348
 
ed81b09
725414c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from __future__ import annotations

from openenv.core.env_server import Action, Observation, State
from pydantic import field_validator


class DeceitObservation(Observation):
    """What the agent sees each step."""

    question: str = ""
    context: list[str] = []
    turn_index: int = 0
    max_turns: int = 3
    level: int = 1


class DeceitAction(Action):
    """What the agent produces each step.

    Set is_final=True to commit an answer and end the episode.
    Set is_final=False to think for another turn (costs a -0.05 step penalty).
    """

    reasoning: str
    answer: str = ""
    confidence: float = 0.5
    abstain: bool = False
    is_final: bool = False

    @field_validator("confidence")
    @classmethod
    def confidence_in_range(cls, v: float) -> float:
        if not 0.0 <= v <= 1.0:
            raise ValueError(f"confidence must be between 0.0 and 1.0, got {v}")
        return v


class DeceitState(State):
    """What the environment tracks internally — never sent to agent."""

    level: int = 1
    ground_truth: str = ""
    current_question_id: str = ""
    episode_rewards: list[float] = []
    prior_reasoning: list[str] = []
    max_turns: int = 3
    pressure_shown: bool = False
    initial_context: list[str] = []