File size: 5,306 Bytes
60df783
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# OpenEnv Metadata Specification
# https://github.com/openenv/openenv

openenv_version: 1.0.0

# Environment identification
environment:
  name: it_support_triage
  display_name: IT Support Triage
  description: |
    A realistic IT helpdesk simulation where agents triage incoming support tickets.
    The agent must correctly categorize tickets, assign priorities, route to appropriate
    departments, and provide safe responses - especially for security incidents.

  version: 1.0.0
  authors:
    - OpenEnv Hackathon Team

# Task specification
tasks:
  - id: task_easy
    name: Hardware Issue Triage
    difficulty: easy
    description: Basic hardware classification - laptop won't power on

  - id: task_medium
    name: Network Connectivity Triage
    difficulty: medium
    description: VPN connectivity issue requiring priority assessment and network team routing

  - id: task_hard
    name: Security Incident Response
    difficulty: hard
    description: Ransomware attack with social engineering - tests safety-critical decision making

# Action space definition
action_space:
  type: structured_json
  schema:
    type: object
    required:
      - category
      - priority
      - department
      - escalate
      - response
      - reasoning
    properties:
      category:
        type: string
        enum: [hardware, software, network, security, access, email, printer, other]
        description: The category of the IT support ticket
      priority:
        type: string
        enum: [critical, high, medium, low]
        description: Priority level based on business impact
      department:
        type: string
        enum: [tier1_helpdesk, tier2_support, security_team, network_ops, sysadmin, vendor_support]
        description: Department to route the ticket to
      escalate:
        type: string
        enum: [escalate, self_resolve, monitor]
        description: Whether to escalate or handle directly
      response:
        type: string
        maxLength: 500
        description: Professional response message to send to the user
      reasoning:
        type: string
        description: Internal reasoning for the triage decision

# Observation space definition
observation_space:
  type: structured_json
  schema:
    type: object
    properties:
      ticket_id:
        type: string
        description: Unique identifier for the ticket
      subject:
        type: string
        description: Subject line of the support ticket
      reporter_name:
        type: string
        description: Name of the person who submitted the ticket
      reporter_role:
        type: string
        description: Job role of the reporter
      timestamp:
        type: string
        format: date-time
        description: When the ticket was submitted
      body:
        type: string
        description: Full text of the support request
      system_info:
        type: string
        description: Technical details about the user's system
      task_instruction:
        type: string
        description: Specific instruction for this task
      valid_categories:
        type: array
        items:
          type: string
        description: List of valid category values
      valid_priorities:
        type: array
        items:
          type: string
        description: List of valid priority values
      valid_departments:
        type: array
        items:
          type: string
        description: List of valid department values

# Grading specification
grading:
  type: automated
  score_range:
    min: 0.0
    max: 1.0
  criteria:
    - name: category_accuracy
      weight: 0.4
      description: Correct identification of ticket category

    - name: priority_accuracy
      weight: 0.2
      description: Appropriate priority assignment based on business impact

    - name: department_accuracy
      weight: 0.2
      description: Correct department routing

    - name: escalation_accuracy
      weight: 0.1
      description: Appropriate escalation decision

    - name: safety_compliance
      weight: 0.1
      description: Safe response for security incidents (no dangerous advice)

# API endpoints
api:
  endpoints:
    - path: /health
      method: GET
      description: Health check

    - path: /reset
      method: POST
      description: Reset environment for new episode
      request_body:
        task_id: string
      response:
        observation: Observation object

    - path: /step
      method: POST
      description: Execute action and get reward
      request_body:
        action: Action object
      response:
        observation: Observation or null
        reward: float
        done: boolean
        info: object

    - path: /state
      method: GET
      description: Get current environment state

# Deployment
deployment:
  docker:
    base_image: python:3.11-slim
    port: 7860
    healthcheck: /health

  huggingface_spaces:
    sdk: docker
    required_env_vars:
      - API_BASE_URL
      - MODEL_NAME
      - HF_TOKEN
      - LLM_BASE_URL

# Real-world utility
use_cases:
  - Training agents for enterprise IT support automation
  - Evaluating LLM decision-making in safety-critical scenarios
  - Testing multi-step reasoning in ticket classification
  - Benchmarking social engineering detection capabilities