File size: 3,202 Bytes
18feac5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
from typing import Dict, List, Literal, Optional

from openenv.core.env_server import Action, Observation, State
from pydantic import Field


class ToolUseAction(Action):
    action_type: Literal[
        "review_ticket",
        "inspect_artifact",
        "search_policy",
        "draft_reply",
        "submit_resolution",
    ] = Field(..., description="The action the agent wants to execute.")
    artifact_id: Optional[str] = Field(
        default=None,
        description="Artifact identifier for inspect_artifact, such as order or risk_log.",
    )
    query: Optional[str] = Field(
        default=None,
        description="Policy name or search query for search_policy.",
    )
    message: Optional[str] = Field(
        default=None,
        description="Customer-facing reply draft used with draft_reply.",
    )
    resolution_code: Optional[str] = Field(
        default=None,
        description="Final resolution code used with submit_resolution.",
    )


class ToolUseObservation(Observation):
    task_id: str = Field(..., description="Deterministic task identifier.")
    difficulty: Literal["easy", "medium", "hard"] = Field(
        ..., description="Difficulty tier for the active task."
    )
    objective: str = Field(..., description="Concrete task objective for the agent.")
    customer_message: str = Field(..., description="The raw customer support ticket.")
    workspace_summary: str = Field(
        ..., description="Short summary of known evidence and remaining work."
    )
    available_actions: List[str] = Field(
        default_factory=list, description="Available environment actions."
    )
    available_resolution_codes: List[str] = Field(
        default_factory=list,
        description="Resolution codes accepted by submit_resolution.",
    )
    collected_evidence: List[str] = Field(
        default_factory=list,
        description="Evidence keys collected so far, such as ticket or payment.",
    )
    last_tool_result: Optional[str] = Field(
        default=None,
        description="Most recent tool or grader output shown to the agent.",
    )
    last_action_error: Optional[str] = Field(
        default=None, description="Validation error for the last action, if any."
    )
    remaining_steps: int = Field(
        ..., description="How many steps are left before the episode ends."
    )
    current_score: float = Field(
        default=0.0,
        description="Current deterministic grader score in the [0, 1] range.",
    )


class ToolUseState(State):
    task_id: str = ""
    task_name: str = ""
    difficulty: str = ""
    objective: str = ""
    cumulative_reward: float = 0.0
    final_score: float = 0.0
    drafted_reply: Optional[str] = None
    resolution_code: Optional[str] = None
    expected_resolution_code: str = ""
    required_evidence: List[str] = Field(default_factory=list)
    collected_evidence: List[str] = Field(default_factory=list)
    action_history: List[str] = Field(default_factory=list)
    repeat_action_count: int = 0
    last_action_error: Optional[str] = None
    known_artifacts: Dict[str, str] = Field(default_factory=dict)
    known_policies: Dict[str, str] = Field(default_factory=dict)