modelbuilderhq commited on
Commit
08bf384
·
verified ·
1 Parent(s): 4f129c9

Delete folder supportdesk_env with huggingface_hub

Browse files
supportdesk_env/__init__.py DELETED
@@ -1,46 +0,0 @@
1
- """SupportDesk OpenEnv environment package."""
2
-
3
- from supportdesk_env.graders import GradeBreakdown, grade_case, grade_task_id
4
- from supportdesk_env.models import (
5
- ActionHistoryEntry,
6
- KnowledgeSnippet,
7
- SupportCaseProgress,
8
- SupportDeskAction,
9
- SupportDeskObservation,
10
- SupportDeskState,
11
- SupportTicket,
12
- )
13
- from supportdesk_env.policies import default_note, default_reply, heuristic_action
14
- from supportdesk_env.tasks import TASKS, SupportTaskSpec, get_task, list_task_ids
15
-
16
- try:
17
- from supportdesk_env.client import SupportDeskEnv
18
- except ImportError: # pragma: no cover - local unit tests can run without openenv-core
19
- SupportDeskEnv = None # type: ignore[assignment]
20
-
21
- try:
22
- from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment
23
- except ImportError: # pragma: no cover - guarded for partial local setups
24
- SupportDeskEnvironment = None # type: ignore[assignment]
25
-
26
- __all__ = [
27
- "ActionHistoryEntry",
28
- "GradeBreakdown",
29
- "KnowledgeSnippet",
30
- "SupportCaseProgress",
31
- "SupportDeskAction",
32
- "SupportDeskEnv",
33
- "SupportDeskEnvironment",
34
- "SupportDeskObservation",
35
- "SupportDeskState",
36
- "SupportTaskSpec",
37
- "SupportTicket",
38
- "TASKS",
39
- "default_note",
40
- "default_reply",
41
- "get_task",
42
- "grade_case",
43
- "grade_task_id",
44
- "heuristic_action",
45
- "list_task_ids",
46
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supportdesk_env/client.py DELETED
@@ -1,38 +0,0 @@
1
- """HTTP client for interacting with a deployed SupportDesk environment."""
2
-
3
- from __future__ import annotations
4
-
5
- from supportdesk_env.models import SupportDeskAction, SupportDeskObservation, SupportDeskState
6
- from supportdesk_env.openenv_compat import EnvClient, StepResult
7
-
8
-
9
- def _validate(model_cls, payload):
10
- if hasattr(model_cls, "model_validate"):
11
- return model_cls.model_validate(payload)
12
- return model_cls(**payload) # pragma: no cover - pydantic v1 fallback
13
-
14
-
15
- class SupportDeskEnv(EnvClient[SupportDeskAction, SupportDeskObservation, SupportDeskState]):
16
- """Typed client for a locally running or deployed OpenEnv server."""
17
-
18
- def _step_payload(self, action: SupportDeskAction) -> dict:
19
- """Convert a typed action into the JSON payload expected by the server."""
20
-
21
- if hasattr(action, "model_dump"):
22
- return action.model_dump()
23
- return action.dict()
24
-
25
- def _parse_state(self, payload) -> SupportDeskState:
26
- return _validate(SupportDeskState, payload)
27
-
28
- def _parse_reset(self, payload) -> SupportDeskObservation:
29
- return _validate(SupportDeskObservation, payload)
30
-
31
- def _parse_result(self, payload) -> StepResult[SupportDeskObservation]:
32
- observation = _validate(SupportDeskObservation, payload["observation"])
33
- # OpenEnv StepResult only accepts observation/reward/done in this runtime.
34
- return StepResult(
35
- observation=observation,
36
- reward=payload["reward"],
37
- done=payload["done"],
38
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supportdesk_env/graders.py DELETED
@@ -1,167 +0,0 @@
1
- """Deterministic graders and reward helpers for SupportDesk."""
2
-
3
- from __future__ import annotations
4
-
5
- import re
6
- from dataclasses import dataclass
7
-
8
- from supportdesk_env.models import SupportCaseProgress
9
- from supportdesk_env.tasks import SupportTaskSpec, get_task
10
-
11
- STRICT_SCORE_EPSILON = 0.01
12
-
13
-
14
- @dataclass(frozen=True)
15
- class GradeBreakdown:
16
- """A scored view of how close a case is to the gold solution."""
17
-
18
- total_score: float
19
- queue_score: float
20
- priority_score: float
21
- issue_type_score: float
22
- requested_fields_score: float
23
- reply_score: float
24
- note_score: float
25
- status_score: float
26
- resolution_score: float
27
- completed_milestones: tuple[str, ...]
28
-
29
-
30
- def _normalize(text: str | None) -> str:
31
- if not text:
32
- return ""
33
- normalized = text.lower().replace("-", " ")
34
- return re.sub(r"[^a-z0-9\s]", " ", normalized)
35
-
36
-
37
- def _marker_group_score(text: str | None, marker_groups: tuple[tuple[str, ...], ...]) -> float:
38
- if not marker_groups:
39
- return 1.0
40
-
41
- normalized = _normalize(text)
42
- if not normalized:
43
- return 0.0
44
-
45
- matches = 0
46
- for group in marker_groups:
47
- if any(_normalize(marker) in normalized for marker in group):
48
- matches += 1
49
- return matches / len(marker_groups)
50
-
51
-
52
- def _requested_fields_score(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
53
- required = set(task.required_requested_fields)
54
- requested = set(case.requested_fields)
55
-
56
- if not required:
57
- return 1.0 if not requested else 0.0
58
- if not requested:
59
- return 0.0
60
-
61
- matched = len(required.intersection(requested))
62
- extras = len(requested.difference(required))
63
- raw = matched / len(required)
64
- penalty = min(0.25, extras * 0.05)
65
- return max(0.0, raw - penalty)
66
-
67
-
68
- def _reply_penalty(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
69
- text = _normalize(case.reply)
70
- if not text:
71
- return 0.0
72
- return 0.0 if not any(_normalize(marker) in text for marker in task.forbidden_reply_markers) else 0.5
73
-
74
-
75
- def _strict_open_unit_interval(score: float) -> float:
76
- """Keep final task scores strictly within (0, 1) for evaluator compatibility."""
77
-
78
- return min(1.0 - STRICT_SCORE_EPSILON, max(STRICT_SCORE_EPSILON, score))
79
-
80
-
81
- def grade_case(task: SupportTaskSpec, case: SupportCaseProgress) -> GradeBreakdown:
82
- """Score a case deterministically with total_score strictly inside (0, 1)."""
83
-
84
- queue_score = 1.0 if case.queue == task.gold_queue else 0.0
85
- priority_score = 1.0 if case.priority == task.gold_priority else 0.0
86
- issue_type_score = 1.0 if case.issue_type == task.gold_issue_type else 0.0
87
- requested_fields_score = _requested_fields_score(case, task)
88
- reply_score = max(0.0, _marker_group_score(case.reply, task.required_reply_markers) - _reply_penalty(case, task))
89
- note_score = _marker_group_score(case.internal_note, task.required_note_markers)
90
- status_score = 1.0 if case.status == task.gold_status else 0.0
91
- resolution_score = 1.0 if case.resolution_code == task.gold_resolution_code else 0.0
92
-
93
- weighted_total = (
94
- queue_score * 0.15
95
- + priority_score * 0.10
96
- + issue_type_score * 0.10
97
- + requested_fields_score * 0.15
98
- + reply_score * 0.25
99
- + note_score * 0.10
100
- + status_score * 0.10
101
- + resolution_score * 0.05
102
- )
103
-
104
- milestones: list[str] = []
105
- if queue_score:
106
- milestones.append("queue")
107
- if priority_score:
108
- milestones.append("priority")
109
- if issue_type_score:
110
- milestones.append("issue_type")
111
- if requested_fields_score >= 0.99:
112
- milestones.append("requested_fields")
113
- if reply_score >= 0.99:
114
- milestones.append("reply")
115
- if note_score >= 0.99:
116
- milestones.append("internal_note")
117
- if status_score:
118
- milestones.append("status")
119
- if resolution_score:
120
- milestones.append("resolution_code")
121
-
122
- return GradeBreakdown(
123
- total_score=round(_strict_open_unit_interval(weighted_total), 4),
124
- queue_score=queue_score,
125
- priority_score=priority_score,
126
- issue_type_score=issue_type_score,
127
- requested_fields_score=round(requested_fields_score, 4),
128
- reply_score=round(reply_score, 4),
129
- note_score=round(note_score, 4),
130
- status_score=status_score,
131
- resolution_score=resolution_score,
132
- completed_milestones=tuple(milestones),
133
- )
134
-
135
-
136
- def grade_task_id(task_id: str, case: SupportCaseProgress) -> GradeBreakdown:
137
- """Convenience wrapper used by tests and evaluation scripts."""
138
-
139
- return grade_case(get_task(task_id), case)
140
-
141
-
142
- class _TaskSpecificGrader:
143
- """Importable task-specific grader wrapper for validator task discovery."""
144
-
145
- task_id: str = ""
146
-
147
- def grade(self, case: SupportCaseProgress) -> float:
148
- return grade_task_id(self.task_id, case).total_score
149
-
150
- def __call__(self, case: SupportCaseProgress) -> float:
151
- return self.grade(case)
152
-
153
-
154
- class BillingRefundEasyGrader(_TaskSpecificGrader):
155
- task_id = "billing_refund_easy"
156
-
157
-
158
- class AccountTakeoverMediumGrader(_TaskSpecificGrader):
159
- task_id = "account_takeover_medium"
160
-
161
-
162
- class ApiIncidentHardGrader(_TaskSpecificGrader):
163
- task_id = "api_incident_hard"
164
-
165
-
166
- class RegulatedExportExceptionHardGrader(_TaskSpecificGrader):
167
- task_id = "regulated_export_exception_hard"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supportdesk_env/models.py DELETED
@@ -1,122 +0,0 @@
1
- """Typed models for the SupportDesk OpenEnv environment."""
2
-
3
- from __future__ import annotations
4
-
5
- from typing import Literal
6
-
7
- from pydantic import BaseModel, Field
8
-
9
- from supportdesk_env.openenv_compat import Action, Observation, State
10
-
11
-
12
- class KnowledgeSnippet(BaseModel):
13
- """A policy or runbook excerpt the agent can use during triage."""
14
-
15
- article_id: str
16
- title: str
17
- content: str
18
-
19
-
20
- class SupportTicket(BaseModel):
21
- """Static task input representing the inbound support ticket."""
22
-
23
- customer_name: str
24
- customer_tier: Literal["free", "pro", "enterprise"]
25
- company: str
26
- subject: str
27
- body: str
28
- region: str
29
- affected_users: int | None = None
30
- sla_minutes_remaining: int | None = None
31
- business_impact: str | None = None
32
- secondary_concerns: list[str] = Field(default_factory=list)
33
- attachments: list[str] = Field(default_factory=list)
34
-
35
-
36
- class ActionHistoryEntry(BaseModel):
37
- """A concise trace entry used in observations and state dumps."""
38
-
39
- step: int
40
- operation: str
41
- summary: str
42
- reward_delta: float = 0.0
43
-
44
-
45
- class CustomerFollowUp(BaseModel):
46
- """A scripted customer response that arrives after a request for more information."""
47
-
48
- status: Literal["none", "pending", "partial", "complete", "incorrect"] = "none"
49
- message: str | None = None
50
- provided_fields: list[str] = Field(default_factory=list)
51
- wrong_fields: list[str] = Field(default_factory=list)
52
-
53
-
54
- class SupportCaseProgress(BaseModel):
55
- """Mutable case state that graders score against."""
56
-
57
- queue: str | None = None
58
- priority: str | None = None
59
- issue_type: str | None = None
60
- status: str = "new"
61
- resolution_code: str | None = None
62
- requested_fields: list[str] = Field(default_factory=list)
63
- reply: str | None = None
64
- internal_note: str | None = None
65
- customer_follow_up: CustomerFollowUp = Field(default_factory=CustomerFollowUp)
66
-
67
-
68
- class SupportDeskAction(Action):
69
- """One structured action the agent can take at each step."""
70
-
71
- operation: Literal["classify", "request_info", "draft_reply", "add_internal_note", "submit", "wait"]
72
- queue: str | None = None
73
- priority: str | None = None
74
- issue_type: str | None = None
75
- status: str | None = None
76
- resolution_code: str | None = None
77
- requested_fields: list[str] = Field(default_factory=list)
78
- reply: str | None = None
79
- internal_note: str | None = None
80
-
81
-
82
- class SupportDeskObservation(Observation):
83
- """Observation emitted to the agent after reset and each step."""
84
-
85
- task_id: str
86
- difficulty: Literal["easy", "medium", "hard"]
87
- objective: str
88
- ticket: SupportTicket
89
- knowledge_base: list[KnowledgeSnippet]
90
- available_queues: list[str]
91
- available_priorities: list[str]
92
- available_statuses: list[str]
93
- available_issue_types: list[str]
94
- case: SupportCaseProgress
95
- current_sla_minutes_remaining: int | None = None
96
- workflow_stage: str
97
- required_next_actions: list[str] = Field(default_factory=list)
98
- risk_flags: list[str] = Field(default_factory=list)
99
- action_history: list[ActionHistoryEntry] = Field(default_factory=list)
100
- feedback: str = ""
101
- remaining_steps: int = 0
102
-
103
-
104
- class SupportDeskState(State):
105
- """Current environment state returned by the OpenEnv state() API."""
106
-
107
- episode_id: str | None = None
108
- task_id: str
109
- difficulty: Literal["easy", "medium", "hard"]
110
- step_count: int = 0
111
- reward: float = 0.0
112
- done: bool = False
113
- current_score: float = 0.0
114
- max_steps: int = 0
115
- case: SupportCaseProgress
116
- current_sla_minutes_remaining: int | None = None
117
- workflow_stage: str
118
- required_next_actions: list[str] = Field(default_factory=list)
119
- risk_flags: list[str] = Field(default_factory=list)
120
- action_history: list[ActionHistoryEntry] = Field(default_factory=list)
121
- completed_milestones: list[str] = Field(default_factory=list)
122
- last_feedback: str = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supportdesk_env/openenv_compat.py DELETED
@@ -1,76 +0,0 @@
1
- """Compatibility helpers for environments where openenv-core is not installed."""
2
-
3
- from __future__ import annotations
4
-
5
- from dataclasses import dataclass, field
6
- from typing import Any, Generic, TypeVar
7
-
8
- from pydantic import BaseModel
9
-
10
- A = TypeVar("A")
11
- O = TypeVar("O")
12
- S = TypeVar("S")
13
-
14
-
15
- OPENENV_AVAILABLE = True
16
-
17
- try:
18
- from openenv.core.client_types import StepResult # type: ignore
19
- from openenv.core.env_client import EnvClient # type: ignore
20
- from openenv.core.env_server.interfaces import Environment # type: ignore
21
- from openenv.core.env_server.types import Action, Observation, State # type: ignore
22
- from openenv.core.env_server.types import EnvironmentMetadata # type: ignore
23
- except ImportError:
24
- try:
25
- from openenv_core.client_types import StepResult # type: ignore
26
- from openenv_core.http_env_client import HTTPEnvClient as EnvClient # type: ignore
27
- from openenv_core.env_server.interfaces import Environment # type: ignore
28
- from openenv_core.env_server.types import Action, Observation, State # type: ignore
29
- from openenv_core.env_server.types import EnvironmentMetadata # type: ignore
30
- except ImportError:
31
- OPENENV_AVAILABLE = False
32
-
33
- class Action(BaseModel):
34
- """Fallback Action base type for local import-only workflows."""
35
-
36
- class Observation(BaseModel):
37
- """Fallback Observation base type for local import-only workflows."""
38
-
39
- reward: float = 0.0
40
- done: bool = False
41
-
42
- class State(BaseModel):
43
- """Fallback State base type for local import-only workflows."""
44
-
45
- class Environment(Generic[A, O, S]):
46
- """Minimal base class used for local unit tests and import-based demos."""
47
-
48
- def __init__(self) -> None:
49
- super().__init__()
50
-
51
- class EnvironmentMetadata(BaseModel):
52
- """Fallback metadata model used when OpenEnv is absent."""
53
-
54
- name: str
55
- description: str
56
- readme_content: str | None = None
57
- version: str | None = None
58
- author: str | None = None
59
-
60
- @dataclass
61
- class StepResult(Generic[O]):
62
- """Fallback step result for local-only client compatibility."""
63
-
64
- observation: O
65
- reward: float
66
- done: bool
67
- info: dict[str, Any] = field(default_factory=dict)
68
-
69
- class EnvClient(Generic[A, O, S]):
70
- """Placeholder client that fails only when actually used."""
71
-
72
- def __init__(self, *args, **kwargs) -> None:
73
- raise ImportError(
74
- "SupportDeskEnv requires openenv-core to be installed. "
75
- "Run `py -3 -m pip install openenv-core` to use the HTTP client."
76
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supportdesk_env/policies.py DELETED
@@ -1,84 +0,0 @@
1
- """Reusable policy helpers for local baselines and training examples."""
2
-
3
- from __future__ import annotations
4
-
5
- from supportdesk_env.models import SupportDeskAction, SupportDeskObservation
6
- from supportdesk_env.tasks import get_task
7
-
8
-
9
- def default_reply(task_id: str) -> str:
10
- """Return a task-specific high-signal customer reply."""
11
-
12
- if task_id == "billing_refund_easy":
13
- return (
14
- "Thanks for flagging the duplicate charge. I have started the refund for the extra "
15
- "charge, and the funds usually appear within 5-7 business days."
16
- )
17
- if task_id == "account_takeover_medium":
18
- return (
19
- "We have escalated this to our trust team. Please reset your password, scan your "
20
- "device for malware, and reply with your workspace_id, last successful login time, "
21
- "and billing email so we can verify the account safely."
22
- )
23
- if task_id == "regulated_export_exception_hard":
24
- return (
25
- "We cannot provide a bypass or temporary unlock yet. Our compliance team is running "
26
- "a compliance review, and we need your tenant_region, dpa_amendment_id, and "
27
- "legal_contact_email to continue that review."
28
- )
29
- return (
30
- "We are treating this as an active incident and our on-call engineering team is engaged. "
31
- "Please send the affected request IDs, UTC timestamps, and the impacted region so we can "
32
- "speed up the investigation."
33
- )
34
-
35
-
36
- def default_note(task_id: str) -> str:
37
- """Return a task-specific internal note."""
38
-
39
- if task_id == "billing_refund_easy":
40
- return "Duplicate charge confirmed from attached invoice; refund approved."
41
- if task_id == "account_takeover_medium":
42
- return "Suspicious login alert reported and customer is locked out."
43
- if task_id == "regulated_export_exception_hard":
44
- return (
45
- "Audit-driven export exception request tied to an EU residency policy block; "
46
- "customer asked for a manual bypass before legal approval."
47
- )
48
- return "EU data residency rollout hit intermittent HTTP 500s and the customer launches tonight."
49
-
50
-
51
- def heuristic_action(observation: SupportDeskObservation) -> SupportDeskAction:
52
- """Deterministic high-performing policy used by the baseline."""
53
-
54
- task = get_task(observation.task_id)
55
- case = observation.case
56
-
57
- if case.queue is None or case.priority is None or case.issue_type is None:
58
- return SupportDeskAction(
59
- operation="classify",
60
- queue=task.gold_queue,
61
- priority=task.gold_priority,
62
- issue_type=task.gold_issue_type,
63
- )
64
-
65
- if task.required_requested_fields and sorted(case.requested_fields) != sorted(task.required_requested_fields):
66
- return SupportDeskAction(
67
- operation="request_info",
68
- requested_fields=list(task.required_requested_fields),
69
- )
70
-
71
- if case.customer_follow_up.status == "pending":
72
- return SupportDeskAction(operation="wait")
73
-
74
- if not case.reply:
75
- return SupportDeskAction(operation="draft_reply", reply=default_reply(observation.task_id))
76
-
77
- if not case.internal_note:
78
- return SupportDeskAction(operation="add_internal_note", internal_note=default_note(observation.task_id))
79
-
80
- return SupportDeskAction(
81
- operation="submit",
82
- status=task.gold_status,
83
- resolution_code=task.gold_resolution_code,
84
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supportdesk_env/server/__init__.py DELETED
@@ -1 +0,0 @@
1
- """SupportDesk server package."""
 
 
supportdesk_env/server/app.py DELETED
@@ -1,245 +0,0 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the BSD-style license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- """
8
- FastAPI application for the SupportDesk environment.
9
-
10
- This module creates an HTTP server that exposes the SupportDeskEnvironment
11
- over HTTP and WebSocket endpoints, compatible with EnvClient.
12
-
13
- Endpoints:
14
- - POST /reset: Reset the environment
15
- - POST /step: Execute an action
16
- - GET /state: Get current environment state
17
- - GET /schema: Get action/observation schemas
18
- - WS /ws: WebSocket endpoint for persistent sessions
19
- - GET /tasks: Get task catalog metadata
20
-
21
- Usage:
22
- # Development (with auto-reload):
23
- uvicorn supportdesk_env.server.app:app --reload --host 0.0.0.0 --port 8000
24
-
25
- # Production:
26
- uvicorn supportdesk_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
27
-
28
- # Or run directly:
29
- python -m supportdesk_env.server.app
30
- """
31
-
32
- from __future__ import annotations
33
-
34
- import os
35
- from typing import Any
36
-
37
- import uvicorn
38
- from fastapi import Body, HTTPException
39
- from fastapi.routing import APIRoute
40
-
41
- try:
42
- from openenv.core.env_server import http_server as openenv_http_server
43
- except ImportError:
44
- try:
45
- from openenv_core.env_server import http_server as openenv_http_server
46
- except Exception as e: # pragma: no cover
47
- raise ImportError(
48
- "openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
49
- ) from e
50
-
51
- try:
52
- from ..models import SupportDeskAction, SupportDeskObservation, SupportDeskState
53
- from ..tasks import TASKS
54
- from .supportdesk_environment import SupportDeskEnvironment
55
- except ModuleNotFoundError:
56
- from supportdesk_env.models import SupportDeskAction, SupportDeskObservation, SupportDeskState
57
- from supportdesk_env.tasks import TASKS
58
- from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment
59
-
60
- # Bind the default OpenEnv /state route to the full typed state model.
61
- openenv_http_server.State = SupportDeskState
62
- create_app = openenv_http_server.create_app
63
-
64
- # Create the app with web interface and README integration.
65
- app = create_app(
66
- SupportDeskEnvironment,
67
- SupportDeskAction,
68
- SupportDeskObservation,
69
- env_name="supportdesk_env",
70
- max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
71
- )
72
-
73
-
74
- TASK_GRADER_PATHS = {
75
- "billing_refund_easy": "graders:BillingRefundEasyGrader",
76
- "account_takeover_medium": "graders:AccountTakeoverMediumGrader",
77
- "api_incident_hard": "graders:ApiIncidentHardGrader",
78
- "regulated_export_exception_hard": "graders:RegulatedExportExceptionHardGrader",
79
- }
80
-
81
-
82
- def _replace_route(path: str, methods: set[str]) -> None:
83
- """Remove a generated route so we can register a score-aware replacement."""
84
-
85
- app.router.routes = [
86
- route
87
- for route in app.router.routes
88
- if not (
89
- isinstance(route, APIRoute)
90
- and route.path == path
91
- and methods.issubset(set(route.methods or set()))
92
- )
93
- ]
94
-
95
-
96
- def _score_response(env: SupportDeskEnvironment, observation: SupportDeskObservation) -> dict[str, Any]:
97
- """Return the standard OpenEnv shape plus an explicit top-level score."""
98
-
99
- return {
100
- "observation": observation.model_dump(),
101
- "reward": observation.reward,
102
- "done": observation.done,
103
- "score": env.state.current_score,
104
- }
105
-
106
-
107
- _replace_route("/reset", {"POST"})
108
- _replace_route("/step", {"POST"})
109
-
110
-
111
- @app.post("/reset")
112
- async def reset_with_score(
113
- request: openenv_http_server.ResetRequest = Body(default_factory=openenv_http_server.ResetRequest),
114
- ) -> dict[str, Any]:
115
- """Reset the environment and expose the initial deterministic score at top level."""
116
-
117
- env = SupportDeskEnvironment()
118
- try:
119
- kwargs = request.model_dump(exclude_unset=True)
120
- observation = env.reset(**kwargs)
121
- return _score_response(env, observation)
122
- finally:
123
- env.close()
124
-
125
-
126
- @app.post("/step")
127
- async def step_with_score(request: openenv_http_server.StepRequest) -> dict[str, Any]:
128
- """Execute a step and expose the current deterministic score at top level."""
129
-
130
- action_data = request.action
131
- try:
132
- action = openenv_http_server.deserialize_action(action_data, SupportDeskAction)
133
- except openenv_http_server.ValidationError as exc:
134
- raise HTTPException(status_code=422, detail=exc.errors()) from exc
135
-
136
- env = SupportDeskEnvironment()
137
- try:
138
- kwargs = request.model_dump(exclude_unset=True, exclude={"action"})
139
- observation = env.step(action, **kwargs)
140
- return _score_response(env, observation)
141
- finally:
142
- env.close()
143
-
144
-
145
- @app.get("/tasks")
146
- def list_tasks() -> dict[str, Any]:
147
- """Expose a stable task catalog for UI, debugging, and pre-submit checks."""
148
-
149
- return {
150
- "environment": {
151
- "name": "supportdesk_env",
152
- "version": "0.1.0",
153
- "grader_type": "deterministic",
154
- "score_range": [0.0, 1.0],
155
- },
156
- "total_tasks": len(TASKS),
157
- "tasks": [
158
- {
159
- "task_id": task.task_id,
160
- "grader": TASK_GRADER_PATHS[task.task_id],
161
- "title": task.title,
162
- "difficulty": task.difficulty,
163
- "objective": task.objective,
164
- "max_steps": task.max_steps,
165
- "gold_issue_type": task.gold_issue_type,
166
- "gold_queue": task.gold_queue,
167
- "gold_priority": task.gold_priority,
168
- "ticket_context": {
169
- "customer_tier": task.ticket.customer_tier,
170
- "region": task.ticket.region,
171
- "affected_users": task.ticket.affected_users,
172
- "sla_minutes_remaining": task.ticket.sla_minutes_remaining,
173
- },
174
- }
175
- for task in TASKS.values()
176
- ],
177
- }
178
-
179
-
180
- @app.get("/episodes/{episode_id}/state", response_model=SupportDeskState)
181
- def get_episode_state(episode_id: str) -> SupportDeskState:
182
- """Optional explicit state helper for robust episode-addressable inspection."""
183
-
184
- try:
185
- return SupportDeskEnvironment.state_for_episode(episode_id)
186
- except ValueError as exc:
187
- raise HTTPException(status_code=404, detail=str(exc)) from exc
188
-
189
-
190
- @app.post("/episodes/{episode_id}/step")
191
- def step_episode(
192
- episode_id: str,
193
- payload: dict[str, Any] = Body(...),
194
- ) -> dict[str, Any]:
195
- """Optional explicit step helper that does not require sticky request context."""
196
-
197
- action_payload = payload.get("action")
198
- if not isinstance(action_payload, dict):
199
- raise HTTPException(status_code=422, detail="Request body must include an 'action' object.")
200
-
201
- timeout_s = payload.get("timeout_s")
202
- try:
203
- action = SupportDeskAction.model_validate(action_payload)
204
- env = SupportDeskEnvironment()
205
- observation = env.step(action, timeout_s=timeout_s, episode_id=episode_id)
206
- except ValueError as exc:
207
- raise HTTPException(status_code=404, detail=str(exc)) from exc
208
-
209
- return {
210
- "observation": observation.model_dump(),
211
- "reward": observation.reward,
212
- "done": observation.done,
213
- "score": SupportDeskEnvironment.state_for_episode(episode_id).current_score,
214
- }
215
-
216
-
217
- def main(host: str = "0.0.0.0", port: int = 8000) -> None:
218
- """
219
- Entry point for direct execution via uv run or python -m.
220
-
221
- This function enables running the server without Docker:
222
- uv run --project . server
223
- uv run --project . server --port 8001
224
- python -m supportdesk_env.server.app
225
-
226
- Args:
227
- host: Host address to bind to (default: "0.0.0.0")
228
- port: Port number to listen on (default: 8000)
229
-
230
- For production deployments, consider using uvicorn directly with
231
- multiple workers:
232
- uvicorn supportdesk_env.server.app:app --workers 4
233
- """
234
-
235
- uvicorn.run("supportdesk_env.server.app:app", host=host, port=port)
236
-
237
-
238
- if __name__ == "__main__":
239
- import argparse
240
-
241
- parser = argparse.ArgumentParser()
242
- parser.add_argument("--host", default=os.getenv("HOST", "0.0.0.0"))
243
- parser.add_argument("--port", type=int, default=int(os.getenv("PORT", "8000")))
244
- args = parser.parse_args()
245
- main(host=args.host, port=args.port)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supportdesk_env/server/supportdesk_environment.py DELETED
@@ -1,545 +0,0 @@
1
- """SupportDesk environment implementation."""
2
-
3
- from __future__ import annotations
4
-
5
- import os
6
- import threading
7
- import uuid
8
- from pathlib import Path
9
- from typing import ClassVar
10
-
11
- from supportdesk_env.graders import grade_case
12
- from supportdesk_env.models import (
13
- ActionHistoryEntry,
14
- CustomerFollowUp,
15
- SupportCaseProgress,
16
- SupportDeskAction,
17
- SupportDeskObservation,
18
- SupportDeskState,
19
- )
20
- from supportdesk_env.openenv_compat import Environment, EnvironmentMetadata
21
- from supportdesk_env.tasks import (
22
- ALL_ISSUE_TYPES,
23
- ALL_PRIORITIES,
24
- ALL_QUEUES,
25
- ALL_STATUSES,
26
- SupportTaskSpec,
27
- get_task,
28
- list_task_ids,
29
- )
30
-
31
-
32
- class SupportDeskEnvironment(
33
- Environment[SupportDeskAction, SupportDeskObservation, SupportDeskState]
34
- ):
35
- """A realistic customer support triage environment with dense rewards."""
36
-
37
- _state_lock: ClassVar[threading.RLock] = threading.RLock()
38
- _episode_store: ClassVar[dict[str, SupportDeskState]] = {}
39
- _episode_task_ids: ClassVar[dict[str, str]] = {}
40
- _latest_episode_id: ClassVar[str | None] = None
41
- _shared_reset_counter: ClassVar[int] = 0
42
-
43
- def __init__(self, task_id: str | None = None):
44
- super().__init__()
45
- env_task_id = os.getenv("SUPPORTDESK_TASK_ID")
46
- self._explicit_task_id = task_id is not None or env_task_id is not None
47
- requested_task = task_id or env_task_id or list_task_ids()[0]
48
- self.task: SupportTaskSpec = get_task(requested_task)
49
- self._max_steps = self.task.max_steps
50
- self._step_count = 0
51
- self._reward_total = 0.0
52
- self._done = False
53
- self._last_feedback = ""
54
- self._history: list[ActionHistoryEntry] = []
55
- self._case = SupportCaseProgress()
56
- self._episode_id: str | None = None
57
- self._current_sla_minutes_remaining = self.task.ticket.sla_minutes_remaining
58
- initial_grade = grade_case(self.task, self._case)
59
- self._score = initial_grade.total_score
60
- self._completed_milestones = list(initial_grade.completed_milestones)
61
-
62
- @classmethod
63
- def _build_initial_state(cls, task: SupportTaskSpec, episode_id: str) -> SupportDeskState:
64
- initial_case = SupportCaseProgress()
65
- initial_grade = grade_case(task, initial_case)
66
- return SupportDeskState(
67
- episode_id=episode_id,
68
- task_id=task.task_id,
69
- difficulty=task.difficulty,
70
- step_count=0,
71
- reward=0.0,
72
- done=False,
73
- current_score=initial_grade.total_score,
74
- max_steps=task.max_steps,
75
- case=initial_case,
76
- current_sla_minutes_remaining=task.ticket.sla_minutes_remaining,
77
- workflow_stage="intake",
78
- required_next_actions=["classify"],
79
- risk_flags=[],
80
- action_history=[],
81
- completed_milestones=list(initial_grade.completed_milestones),
82
- last_feedback="New case loaded. Review the ticket and policy snippets before acting.",
83
- )
84
-
85
- @classmethod
86
- def _extract_episode_id(cls, episode_id: str | None = None, **kwargs) -> str | None:
87
- if episode_id:
88
- return episode_id
89
- for key in ("episode_id", "request_id"):
90
- value = kwargs.get(key)
91
- if isinstance(value, str) and value:
92
- return value
93
- return None
94
-
95
- def _load_episode(self, episode_id: str | None = None, **kwargs) -> None:
96
- resolved_episode_id = self._extract_episode_id(episode_id, **kwargs) or self.__class__._latest_episode_id
97
- if not resolved_episode_id:
98
- return
99
-
100
- episode_state = self.__class__._episode_store.get(resolved_episode_id)
101
- if episode_state is None:
102
- raise ValueError(
103
- f"Unknown episode_id '{resolved_episode_id}'. Call reset() first or provide a valid episode_id."
104
- )
105
-
106
- task = get_task(self.__class__._episode_task_ids.get(resolved_episode_id, episode_state.task_id))
107
- self.task = task
108
- self._max_steps = episode_state.max_steps
109
- self._step_count = episode_state.step_count
110
- self._reward_total = episode_state.reward
111
- self._done = episode_state.done
112
- self._last_feedback = episode_state.last_feedback
113
- self._history = [entry.model_copy(deep=True) for entry in episode_state.action_history]
114
- self._case = episode_state.case.model_copy(deep=True)
115
- self._episode_id = resolved_episode_id
116
- self._score = episode_state.current_score
117
- self._completed_milestones = list(episode_state.completed_milestones)
118
- self._current_sla_minutes_remaining = episode_state.current_sla_minutes_remaining
119
-
120
- def _persist_episode(self) -> None:
121
- if self._episode_id is None:
122
- return
123
- self.__class__._episode_store[self._episode_id] = SupportDeskState(
124
- episode_id=self._episode_id,
125
- task_id=self.task.task_id,
126
- difficulty=self.task.difficulty,
127
- step_count=self._step_count,
128
- reward=round(self._reward_total, 4),
129
- done=self._done,
130
- current_score=round(self._score, 4),
131
- max_steps=self._max_steps,
132
- case=self._case.model_copy(deep=True),
133
- current_sla_minutes_remaining=self._current_sla_minutes_remaining,
134
- workflow_stage=self._workflow_stage(),
135
- required_next_actions=self._required_next_actions(),
136
- risk_flags=self._risk_flags(),
137
- action_history=[entry.model_copy(deep=True) for entry in self._history],
138
- completed_milestones=list(self._completed_milestones),
139
- last_feedback=self._last_feedback,
140
- )
141
- self.__class__._episode_task_ids[self._episode_id] = self.task.task_id
142
- self.__class__._latest_episode_id = self._episode_id
143
-
144
- @property
145
- def state(self) -> SupportDeskState:
146
- with self.__class__._state_lock:
147
- self._load_episode()
148
- return SupportDeskState(
149
- episode_id=self._episode_id,
150
- task_id=self.task.task_id,
151
- difficulty=self.task.difficulty,
152
- step_count=self._step_count,
153
- reward=round(self._reward_total, 4),
154
- done=self._done,
155
- current_score=round(self._score, 4),
156
- max_steps=self._max_steps,
157
- case=self._case.model_copy(deep=True),
158
- current_sla_minutes_remaining=self._current_sla_minutes_remaining,
159
- workflow_stage=self._workflow_stage(),
160
- required_next_actions=self._required_next_actions(),
161
- risk_flags=self._risk_flags(),
162
- action_history=[entry.model_copy(deep=True) for entry in self._history],
163
- completed_milestones=list(self._completed_milestones),
164
- last_feedback=self._last_feedback,
165
- )
166
-
167
- def reset(
168
- self,
169
- seed: int | None = None,
170
- episode_id: str | None = None,
171
- **kwargs,
172
- ) -> SupportDeskObservation:
173
- with self.__class__._state_lock:
174
- if not self._explicit_task_id:
175
- task_ids = list_task_ids()
176
- next_task_id = task_ids[self.__class__._shared_reset_counter % len(task_ids)]
177
- self.__class__._shared_reset_counter += 1
178
- self.task = get_task(next_task_id)
179
- self._max_steps = self.task.max_steps
180
- self._episode_id = episode_id or f"{self.task.task_id}-{uuid.uuid4().hex[:8]}"
181
- initial_state = self.__class__._build_initial_state(self.task, self._episode_id)
182
- self.__class__._episode_store[self._episode_id] = initial_state
183
- self.__class__._episode_task_ids[self._episode_id] = self.task.task_id
184
- self.__class__._latest_episode_id = self._episode_id
185
- self._load_episode(self._episode_id)
186
- return self._build_observation(reward=0.0, done=False)
187
-
188
- def step(
189
- self,
190
- action: SupportDeskAction,
191
- timeout_s: float | None = None,
192
- episode_id: str | None = None,
193
- **kwargs,
194
- ) -> SupportDeskObservation:
195
- with self.__class__._state_lock:
196
- self._load_episode(episode_id, **kwargs)
197
-
198
- if self._done:
199
- return self._build_observation(
200
- reward=-0.05,
201
- done=True,
202
- feedback="Episode already finished. Call reset() before taking more actions.",
203
- )
204
-
205
- previous_grade = grade_case(self.task, self._case)
206
- previous_stage = self._workflow_stage()
207
- self._apply_action(action)
208
- self._step_count += 1
209
- self._advance_external_events(action)
210
- self._degrade_sla()
211
-
212
- current_grade = grade_case(self.task, self._case)
213
- reward = current_grade.total_score - previous_grade.total_score
214
- reward += self._process_bonus(action, previous_stage, current_grade.total_score)
215
- reward += self._action_penalty(
216
- action,
217
- current_grade.total_score,
218
- previous_grade.total_score,
219
- )
220
- reward = round(reward, 4)
221
-
222
- self._score = current_grade.total_score
223
- self._completed_milestones = list(current_grade.completed_milestones)
224
-
225
- if action.operation == "submit":
226
- self._done = True
227
- self._last_feedback = (
228
- "Case submitted. Final deterministic grade is "
229
- f"{current_grade.total_score:.2f}."
230
- )
231
- elif self._step_count >= self._max_steps:
232
- self._done = True
233
- self._last_feedback = (
234
- f"Reached max steps ({self._max_steps}). Final deterministic grade is "
235
- f"{current_grade.total_score:.2f}."
236
- )
237
- else:
238
- self._last_feedback = self._build_feedback(current_grade, reward)
239
-
240
- self._reward_total = round(self._reward_total + reward, 4)
241
- self._history.append(
242
- ActionHistoryEntry(
243
- step=self._step_count,
244
- operation=action.operation,
245
- summary=self._summarize_action(action),
246
- reward_delta=reward,
247
- )
248
- )
249
- self._persist_episode()
250
-
251
- return self._build_observation(reward=reward, done=self._done)
252
-
253
- @classmethod
254
- def state_for_episode(cls, episode_id: str) -> SupportDeskState:
255
- with cls._state_lock:
256
- state = cls._episode_store.get(episode_id)
257
- if state is None:
258
- raise ValueError(f"Unknown episode_id '{episode_id}'. Call reset() first.")
259
- return state.model_copy(deep=True)
260
-
261
- def close(self) -> None:
262
- """No-op close hook for compatibility with local scripts."""
263
-
264
- def get_metadata(self) -> EnvironmentMetadata:
265
- """Return richer metadata for docs, validators, and HF Space UI."""
266
-
267
- readme_path = Path(__file__).resolve().parents[2] / "README.md"
268
- readme_content = readme_path.read_text(encoding="utf-8") if readme_path.exists() else None
269
- return EnvironmentMetadata(
270
- name="supportdesk_env",
271
- description=(
272
- "A policy-heavy enterprise operations desk with deterministic grading, delayed "
273
- "customer follow-ups, SLA pressure, escalation tradeoffs, and sharper cross-functional triage."
274
- ),
275
- readme_content=readme_content,
276
- version="0.1.0",
277
- author="HyperBrick",
278
- )
279
-
280
- def _apply_action(self, action: SupportDeskAction) -> None:
281
- if action.operation == "classify":
282
- if action.queue is not None:
283
- self._case.queue = action.queue
284
- if action.priority is not None:
285
- self._case.priority = action.priority
286
- if action.issue_type is not None:
287
- self._case.issue_type = action.issue_type
288
- return
289
-
290
- if action.operation == "request_info":
291
- if action.requested_fields:
292
- merged = {item for item in self._case.requested_fields}
293
- merged.update(action.requested_fields)
294
- self._case.requested_fields = sorted(merged)
295
- if self.task.follow_up_outcome != "none" and self._case.customer_follow_up.status == "none":
296
- self._case.customer_follow_up = CustomerFollowUp(status="pending")
297
- return
298
-
299
- if action.operation == "draft_reply":
300
- if action.reply is not None:
301
- self._case.reply = action.reply
302
- return
303
-
304
- if action.operation == "add_internal_note":
305
- if action.internal_note is not None:
306
- self._case.internal_note = action.internal_note
307
- return
308
-
309
- if action.operation == "submit":
310
- if action.status is not None:
311
- self._case.status = action.status
312
- if action.resolution_code is not None:
313
- self._case.resolution_code = action.resolution_code
314
-
315
- def _advance_external_events(self, action: SupportDeskAction) -> None:
316
- if self._case.customer_follow_up.status == "pending" and action.operation == "wait":
317
- self._case.customer_follow_up = CustomerFollowUp(
318
- status=self.task.follow_up_outcome,
319
- message=self.task.follow_up_message or None,
320
- provided_fields=list(self.task.follow_up_provided_fields),
321
- wrong_fields=list(self.task.follow_up_wrong_fields),
322
- )
323
-
324
- def _degrade_sla(self) -> None:
325
- if self._current_sla_minutes_remaining is None:
326
- return
327
- self._current_sla_minutes_remaining = max(
328
- 0,
329
- self._current_sla_minutes_remaining - self.task.sla_step_cost,
330
- )
331
-
332
- def _action_penalty(
333
- self,
334
- action: SupportDeskAction,
335
- current_score: float,
336
- previous_score: float,
337
- ) -> float:
338
- penalty = 0.0
339
- if current_score <= previous_score:
340
- penalty -= 0.03
341
- penalty -= self._mixed_action_penalty(action)
342
- penalty -= self._escalation_tradeoff_penalty()
343
- if action.operation == "draft_reply" and not action.reply:
344
- penalty -= 0.03
345
- if action.operation == "request_info" and not action.requested_fields:
346
- penalty -= 0.03
347
- if action.operation == "add_internal_note" and not action.internal_note:
348
- penalty -= 0.03
349
- if action.operation == "classify" and not any(
350
- [action.queue, action.priority, action.issue_type, action.status, action.resolution_code]
351
- ):
352
- penalty -= 0.03
353
- if action.operation == "wait" and self._case.customer_follow_up.status != "pending":
354
- penalty -= 0.02
355
- if action.operation == "submit" and self._required_next_actions():
356
- penalty -= 0.08
357
- if (
358
- self.task.under_escalation_deadline_step is not None
359
- and self._step_count >= self.task.under_escalation_deadline_step
360
- and (self._case.queue != self.task.gold_queue or self._case.priority != self.task.gold_priority)
361
- ):
362
- penalty -= 0.04
363
- if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining <= 15:
364
- penalty -= 0.02
365
- return round(penalty, 4)
366
-
367
- def _build_feedback(self, grade, reward: float) -> str:
368
- return (
369
- f"Reward delta {reward:+.2f}. Current score {grade.total_score:.2f}. "
370
- f"SLA remaining: {self._current_sla_minutes_remaining if self._current_sla_minutes_remaining is not None else 'n/a'} minutes. "
371
- f"Stage: {self._workflow_stage()}. "
372
- f"Customer follow-up: {self._case.customer_follow_up.status}. "
373
- f"Next actions: {', '.join(self._required_next_actions()) or 'none'}. "
374
- f"Completed milestones: {', '.join(grade.completed_milestones) or 'none yet'}."
375
- )
376
-
377
- def _summarize_action(self, action: SupportDeskAction) -> str:
378
- parts = [action.operation]
379
- if action.queue:
380
- parts.append(f"queue={action.queue}")
381
- if action.priority:
382
- parts.append(f"priority={action.priority}")
383
- if action.issue_type:
384
- parts.append(f"issue_type={action.issue_type}")
385
- if action.status:
386
- parts.append(f"status={action.status}")
387
- if action.resolution_code:
388
- parts.append(f"resolution={action.resolution_code}")
389
- if action.requested_fields:
390
- parts.append(f"requested={','.join(action.requested_fields)}")
391
- if action.reply:
392
- parts.append("reply=yes")
393
- if action.internal_note:
394
- parts.append("note=yes")
395
- return " | ".join(parts)
396
-
397
- def _build_observation(
398
- self,
399
- reward: float,
400
- done: bool,
401
- feedback: str | None = None,
402
- ) -> SupportDeskObservation:
403
- return SupportDeskObservation(
404
- task_id=self.task.task_id,
405
- difficulty=self.task.difficulty,
406
- objective=self.task.objective,
407
- ticket=self.task.ticket,
408
- knowledge_base=list(self.task.knowledge_base),
409
- available_queues=list(ALL_QUEUES),
410
- available_priorities=list(ALL_PRIORITIES),
411
- available_statuses=list(ALL_STATUSES),
412
- available_issue_types=list(ALL_ISSUE_TYPES),
413
- case=self._case.model_copy(deep=True),
414
- current_sla_minutes_remaining=self._current_sla_minutes_remaining,
415
- workflow_stage=self._workflow_stage(),
416
- required_next_actions=self._required_next_actions(),
417
- risk_flags=self._risk_flags(),
418
- action_history=[entry.model_copy(deep=True) for entry in self._history],
419
- feedback=feedback or self._last_feedback,
420
- remaining_steps=max(self._max_steps - self._step_count, 0),
421
- reward=reward,
422
- done=done,
423
- )
424
-
425
- def _workflow_stage(self) -> str:
426
- if self._done:
427
- return "closed"
428
- if self._case.queue is None or self._case.priority is None or self._case.issue_type is None:
429
- return "intake"
430
- if self.task.required_requested_fields and sorted(self._case.requested_fields) != sorted(self.task.required_requested_fields):
431
- return "verification"
432
- if self._case.customer_follow_up.status == "pending":
433
- return "awaiting_customer"
434
- if self._case.customer_follow_up.status in {"partial", "incorrect"}:
435
- return "follow_up_review"
436
- if not self._case.reply:
437
- return "customer_communication"
438
- if not self._case.internal_note:
439
- return "internal_handoff"
440
- if self._case.status != self.task.gold_status or self._case.resolution_code != self.task.gold_resolution_code:
441
- return "final_resolution"
442
- return "ready_to_submit"
443
-
444
- def _required_next_actions(self) -> list[str]:
445
- if self._case.queue is None or self._case.priority is None or self._case.issue_type is None:
446
- return ["classify"]
447
- if self.task.required_requested_fields and sorted(self._case.requested_fields) != sorted(self.task.required_requested_fields):
448
- return ["request_info"]
449
- if self._case.customer_follow_up.status == "pending":
450
- return ["wait"]
451
- needed: list[str] = []
452
- if not self._case.reply:
453
- needed.append("draft_reply")
454
- if not self._case.internal_note:
455
- needed.append("add_internal_note")
456
- if self._case.status != self.task.gold_status or self._case.resolution_code != self.task.gold_resolution_code:
457
- needed.append("submit")
458
- return needed
459
-
460
- def _risk_flags(self) -> list[str]:
461
- flags = list(self.task.risk_flags)
462
- if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining <= 30:
463
- flags.append("sla_breach_risk")
464
- if self.task.ticket.affected_users and self.task.ticket.affected_users >= 1000:
465
- flags.append("high_customer_impact")
466
- if self.task.ticket.secondary_concerns:
467
- flags.append("secondary_issue_present")
468
- if self._case.customer_follow_up.status == "partial":
469
- flags.append("customer_reply_incomplete")
470
- if self._case.customer_follow_up.status == "incorrect":
471
- flags.append("customer_reply_irrelevant")
472
- return sorted(set(flags))
473
-
474
- def _process_bonus(
475
- self,
476
- action: SupportDeskAction,
477
- previous_stage: str,
478
- current_score: float,
479
- ) -> float:
480
- bonus = 0.0
481
- stage_rank = {
482
- "intake": 0,
483
- "verification": 1,
484
- "awaiting_customer": 2,
485
- "follow_up_review": 3,
486
- "customer_communication": 4,
487
- "internal_handoff": 5,
488
- "final_resolution": 6,
489
- "ready_to_submit": 7,
490
- "closed": 8,
491
- }
492
- current_stage = self._workflow_stage()
493
- if stage_rank.get(current_stage, 0) > stage_rank.get(previous_stage, 0):
494
- bonus += 0.02
495
- if action.operation == "classify" and self._step_count == 1:
496
- if self._case.queue == self.task.gold_queue and self._case.priority == self.task.gold_priority:
497
- bonus += 0.03
498
- if action.operation == "request_info" and current_score > 0 and self.task.required_requested_fields:
499
- bonus += 0.02
500
- if action.operation == "wait" and self._case.customer_follow_up.status in {"partial", "complete", "incorrect"}:
501
- bonus += 0.02
502
- if action.operation == "submit" and not self._required_next_actions():
503
- bonus += 0.03
504
- if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining > 0:
505
- if self.task.gold_priority == "urgent" and self._step_count <= 2 and self._case.queue == self.task.gold_queue:
506
- bonus += 0.02
507
- return round(bonus, 4)
508
-
509
- def _mixed_action_penalty(self, action: SupportDeskAction) -> float:
510
- allowed_fields = {
511
- "classify": {"queue", "priority", "issue_type"},
512
- "request_info": {"requested_fields"},
513
- "draft_reply": {"reply"},
514
- "add_internal_note": {"internal_note"},
515
- "submit": {"status", "resolution_code"},
516
- "wait": set(),
517
- }
518
- populated_fields = {
519
- "queue": action.queue,
520
- "priority": action.priority,
521
- "issue_type": action.issue_type,
522
- "status": action.status,
523
- "resolution_code": action.resolution_code,
524
- "requested_fields": action.requested_fields,
525
- "reply": action.reply,
526
- "internal_note": action.internal_note,
527
- }
528
- extras = 0
529
- for field_name, value in populated_fields.items():
530
- if field_name in allowed_fields[action.operation]:
531
- continue
532
- if value is None:
533
- continue
534
- if isinstance(value, list) and not value:
535
- continue
536
- if isinstance(value, str) and not value:
537
- continue
538
- extras += 1
539
- return min(0.06, extras * 0.02)
540
-
541
- def _escalation_tradeoff_penalty(self) -> float:
542
- penalty = 0.0
543
- if self._case.queue in self.task.over_escalation_queues and self._case.queue != self.task.gold_queue:
544
- penalty += 0.06
545
- return round(penalty, 4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
supportdesk_env/tasks.py DELETED
@@ -1,405 +0,0 @@
1
- """Task registry for the SupportDesk environment."""
2
-
3
- from __future__ import annotations
4
-
5
- from dataclasses import dataclass
6
- from typing import Literal
7
-
8
- from supportdesk_env.models import KnowledgeSnippet, SupportTicket
9
-
10
-
11
- ALL_QUEUES = [
12
- "billing_ops",
13
- "trust_and_safety",
14
- "platform_engineering",
15
- "compliance_ops",
16
- "general_support",
17
- ]
18
- ALL_PRIORITIES = ["low", "normal", "high", "urgent"]
19
- ALL_STATUSES = ["new", "waiting_on_customer", "resolved", "escalated"]
20
- ALL_ISSUE_TYPES = [
21
- "duplicate_charge",
22
- "account_compromise",
23
- "production_incident",
24
- "regulated_exception",
25
- "general_question",
26
- ]
27
-
28
-
29
- @dataclass(frozen=True)
30
- class SupportTaskSpec:
31
- """Immutable definition of a single support triage task."""
32
-
33
- task_id: str
34
- difficulty: Literal["easy", "medium", "hard"]
35
- title: str
36
- objective: str
37
- ticket: SupportTicket
38
- knowledge_base: tuple[KnowledgeSnippet, ...]
39
- gold_queue: str
40
- gold_priority: str
41
- gold_issue_type: str
42
- gold_status: str
43
- gold_resolution_code: str
44
- required_requested_fields: tuple[str, ...]
45
- required_reply_markers: tuple[tuple[str, ...], ...]
46
- required_note_markers: tuple[tuple[str, ...], ...]
47
- forbidden_reply_markers: tuple[str, ...] = ()
48
- risk_flags: tuple[str, ...] = ()
49
- follow_up_outcome: Literal["none", "partial", "complete", "incorrect"] = "none"
50
- follow_up_message: str = ""
51
- follow_up_provided_fields: tuple[str, ...] = ()
52
- follow_up_wrong_fields: tuple[str, ...] = ()
53
- sla_step_cost: int = 15
54
- over_escalation_queues: tuple[str, ...] = ()
55
- under_escalation_deadline_step: int | None = None
56
- max_steps: int = 6
57
-
58
-
59
- TASKS: dict[str, SupportTaskSpec] = {
60
- "billing_refund_easy": SupportTaskSpec(
61
- task_id="billing_refund_easy",
62
- difficulty="easy",
63
- title="Duplicate charge refund triage",
64
- objective=(
65
- "Triage a duplicate-charge billing ticket, send the correct customer response, "
66
- "and close the case only if no further customer information is required."
67
- ),
68
- ticket=SupportTicket(
69
- customer_name="Riya Shah",
70
- customer_tier="pro",
71
- company="PixelNorth Studio",
72
- subject="Charged twice after I canceled",
73
- body=(
74
- "I canceled our Pro annual workspace yesterday, but my card was charged again "
75
- "this morning and I still see the old invoice. We only had one workspace, "
76
- "so this looks like a duplicate charge. Please fix it quickly."
77
- ),
78
- region="ap-south-1",
79
- affected_users=12,
80
- sla_minutes_remaining=240,
81
- business_impact="Finance ops are blocked from closing the monthly books until the duplicate invoice is fixed.",
82
- secondary_concerns=["The customer also wants confirmation that the canceled workspace will stay deactivated."],
83
- attachments=["invoice_7741.pdf"],
84
- ),
85
- knowledge_base=(
86
- KnowledgeSnippet(
87
- article_id="KB-101",
88
- title="Duplicate charges and same-day cancellations",
89
- content=(
90
- "If a customer reports a duplicate charge and the subscription is already "
91
- "canceled, route the ticket to billing_ops with high priority. Billing can "
92
- "approve the refund immediately without requesting extra information when an "
93
- "invoice is attached."
94
- ),
95
- ),
96
- KnowledgeSnippet(
97
- article_id="KB-102",
98
- title="Refund communication checklist",
99
- content=(
100
- "Customer replies for approved duplicate-charge refunds must confirm that a "
101
- "refund is being processed, mention the duplicate charge, and set the "
102
- "expectation that funds typically appear within 5-7 business days."
103
- ),
104
- ),
105
- KnowledgeSnippet(
106
- article_id="KB-103",
107
- title="When to close a billing case",
108
- content=(
109
- "Close the case as resolved only after the refund path is clear and no more "
110
- "customer details are needed."
111
- ),
112
- ),
113
- ),
114
- gold_queue="billing_ops",
115
- gold_priority="high",
116
- gold_issue_type="duplicate_charge",
117
- gold_status="resolved",
118
- gold_resolution_code="refund_approved",
119
- required_requested_fields=(),
120
- required_reply_markers=(
121
- ("refund", "refunded", "reimburse"),
122
- ("duplicate charge", "charged twice", "double charge"),
123
- ("5-7 business days", "5 to 7 business days", "within 7 business days"),
124
- ),
125
- required_note_markers=(
126
- ("duplicate charge", "double charge"),
127
- ("refund", "refund approved"),
128
- ),
129
- forbidden_reply_markers=("chargeback", "security team"),
130
- risk_flags=("finance_close_risk", "avoid_unnecessary_back_and_forth"),
131
- over_escalation_queues=("trust_and_safety", "platform_engineering", "compliance_ops"),
132
- sla_step_cost=10,
133
- max_steps=6,
134
- ),
135
- "account_takeover_medium": SupportTaskSpec(
136
- task_id="account_takeover_medium",
137
- difficulty="medium",
138
- title="Suspicious login recovery triage",
139
- objective=(
140
- "Handle a potential account-compromise case, request the missing verification "
141
- "details, communicate safe next steps, and keep the case open until the customer replies. "
142
- "The agent must protect account safety without promising an unsafe immediate unlock."
143
- ),
144
- ticket=SupportTicket(
145
- customer_name="Marcus Lee",
146
- customer_tier="pro",
147
- company="Northline Analytics",
148
- subject="Locked out after strange login alert",
149
- body=(
150
- "Our workspace admin got a login alert from a country none of us have visited, "
151
- "and now I can't get back into the account. Please unlock it ASAP. The billing "
152
- "email is still ours, but I'm worried someone got in."
153
- ),
154
- region="us-east-1",
155
- affected_users=34,
156
- sla_minutes_remaining=90,
157
- business_impact="The admin is locked out of the analytics workspace ahead of the Monday executive review.",
158
- secondary_concerns=["The customer wants the account unlocked immediately, but the verification flow cannot be skipped."],
159
- attachments=[],
160
- ),
161
- knowledge_base=(
162
- KnowledgeSnippet(
163
- article_id="SEC-201",
164
- title="Account compromise routing",
165
- content=(
166
- "Potential account-takeover reports route to trust_and_safety with urgent "
167
- "priority. Do not resolve the case immediately."
168
- ),
169
- ),
170
- KnowledgeSnippet(
171
- article_id="SEC-202",
172
- title="Verification details before unlock",
173
- content=(
174
- "Before access can be restored, ask the customer for the workspace_id, the "
175
- "last successful login time, and the billing email on file. Keep the status "
176
- "waiting_on_customer until the details arrive."
177
- ),
178
- ),
179
- KnowledgeSnippet(
180
- article_id="SEC-203",
181
- title="Customer response checklist",
182
- content=(
183
- "Security replies should tell the customer to reset their password, scan "
184
- "their device for malware, and explain that the trust team is reviewing the case."
185
- ),
186
- ),
187
- ),
188
- gold_queue="trust_and_safety",
189
- gold_priority="urgent",
190
- gold_issue_type="account_compromise",
191
- gold_status="waiting_on_customer",
192
- gold_resolution_code="verification_needed",
193
- required_requested_fields=("workspace_id", "last_successful_login", "billing_email"),
194
- required_reply_markers=(
195
- ("reset your password", "change your password"),
196
- ("scan", "malware", "device check"),
197
- ("trust team", "security team", "trust and safety"),
198
- ),
199
- required_note_markers=(
200
- ("suspicious login", "strange login"),
201
- ("locked out", "can't get back", "cannot get back"),
202
- ),
203
- risk_flags=("unsafe_unlock_request", "identity_verification_required"),
204
- follow_up_outcome="partial",
205
- follow_up_message=(
206
- "Customer follow-up: workspace_id=ws_9021 and billing email confirmed, "
207
- "but they could not provide the last successful login time yet."
208
- ),
209
- follow_up_provided_fields=("workspace_id", "billing_email"),
210
- sla_step_cost=18,
211
- under_escalation_deadline_step=2,
212
- max_steps=7,
213
- ),
214
- "api_incident_hard": SupportTaskSpec(
215
- task_id="api_incident_hard",
216
- difficulty="hard",
217
- title="Production API incident escalation",
218
- objective=(
219
- "Triage a high-pressure enterprise incident, ask for the right diagnostics, notify "
220
- "the customer that engineering is engaged, and escalate instead of resolving. "
221
- "The agent must prioritize the outage over a tempting secondary compliance question."
222
- ),
223
- ticket=SupportTicket(
224
- customer_name="Asha Verma",
225
- customer_tier="enterprise",
226
- company="Kairo Health",
227
- subject="EU rollout blocked by intermittent 500s",
228
- body=(
229
- "We're launching our EU workspace tonight. Since enabling EU data residency we "
230
- "see intermittent HTTP 500 responses from /v1/exports in production. Our "
231
- "compliance lead is also asking whether this affects the audit trail, but the "
232
- "main issue is the outage. We need help immediately."
233
- ),
234
- region="eu-west-1",
235
- affected_users=1800,
236
- sla_minutes_remaining=25,
237
- business_impact="A production launch and a customer-facing compliance review are both at risk tonight if the outage persists.",
238
- secondary_concerns=["The compliance lead is asking whether audit trails are affected, but the live outage is the primary incident."],
239
- attachments=["error_screenshot.png"],
240
- ),
241
- knowledge_base=(
242
- KnowledgeSnippet(
243
- article_id="INC-301",
244
- title="Production availability incidents",
245
- content=(
246
- "Any active production 5xx incident for a paying customer routes to "
247
- "platform_engineering with urgent priority and should be escalated, not resolved."
248
- ),
249
- ),
250
- KnowledgeSnippet(
251
- article_id="INC-302",
252
- title="Minimum diagnostics for API incidents",
253
- content=(
254
- "Before engineering can investigate, request concrete examples including "
255
- "request_ids, UTC timestamps, and the affected region."
256
- ),
257
- ),
258
- KnowledgeSnippet(
259
- article_id="INC-303",
260
- title="Customer communication during an incident",
261
- content=(
262
- "The reply should acknowledge an incident, say the on-call engineering team "
263
- "is engaged, and ask for the diagnostics needed to speed investigation."
264
- ),
265
- ),
266
- KnowledgeSnippet(
267
- article_id="INC-304",
268
- title="Primary issue triage rule",
269
- content=(
270
- "When a production outage appears alongside a secondary compliance or audit "
271
- "question, resolve the live outage first and avoid treating the secondary "
272
- "question as the primary queue-driving issue."
273
- ),
274
- ),
275
- ),
276
- gold_queue="platform_engineering",
277
- gold_priority="urgent",
278
- gold_issue_type="production_incident",
279
- gold_status="escalated",
280
- gold_resolution_code="incident_opened",
281
- required_requested_fields=("request_ids", "timestamp_utc", "region"),
282
- required_reply_markers=(
283
- ("incident", "outage", "investigating"),
284
- ("on-call", "engineering team", "engineering is engaged"),
285
- ("request id", "request_ids"),
286
- ("utc", "timestamp"),
287
- ),
288
- required_note_markers=(
289
- ("eu data residency", "eu rollout"),
290
- ("500", "http 500"),
291
- ("launch tonight", "tonight"),
292
- ),
293
- risk_flags=("sev1_launch_risk", "secondary_issue_distraction", "engineering_escalation_required"),
294
- follow_up_outcome="complete",
295
- follow_up_message=(
296
- "Customer follow-up: request_ids=req_991, req_998; UTC timestamps=2026-04-01T19:20Z, "
297
- "2026-04-01T19:27Z; region=eu-west-1."
298
- ),
299
- follow_up_provided_fields=("request_ids", "timestamp_utc", "region"),
300
- sla_step_cost=25,
301
- under_escalation_deadline_step=2,
302
- max_steps=8,
303
- ),
304
- "regulated_export_exception_hard": SupportTaskSpec(
305
- task_id="regulated_export_exception_hard",
306
- difficulty="hard",
307
- title="Regulated export exception triage",
308
- objective=(
309
- "Handle a regulated export-access request where the customer wants a manual bypass. "
310
- "The agent must avoid escalating to engineering just because an export error is mentioned, "
311
- "route to compliance operations, request the right legal details, and keep the case open "
312
- "until the required approvals are supplied."
313
- ),
314
- ticket=SupportTicket(
315
- customer_name="Dr. Elena Kovacs",
316
- customer_tier="enterprise",
317
- company="NorthSea Clinics",
318
- subject="Need urgent export unlock before audit review",
319
- body=(
320
- "Our audit team needs tonight's export package, but after enabling EU residency the "
321
- "export workflow now shows a policy block. Can you temporarily disable the restriction "
322
- "or give us a manual bypass just for this audit? We are under pressure and can share "
323
- "screenshots if needed."
324
- ),
325
- region="eu-central-1",
326
- affected_users=240,
327
- sla_minutes_remaining=55,
328
- business_impact="A regulated healthcare audit review may slip tonight if the export path remains blocked.",
329
- secondary_concerns=[
330
- "The customer suggests an engineering workaround, but legal approval is the real gating factor."
331
- ],
332
- attachments=["policy_block.png"],
333
- ),
334
- knowledge_base=(
335
- KnowledgeSnippet(
336
- article_id="CMP-401",
337
- title="Export policy exceptions",
338
- content=(
339
- "Requests to bypass region or export restrictions route to compliance_ops. "
340
- "Do not promise a manual engineering workaround before approvals are verified."
341
- ),
342
- ),
343
- KnowledgeSnippet(
344
- article_id="CMP-402",
345
- title="Minimum legal details for export review",
346
- content=(
347
- "Ask for the tenant_region, dpa_amendment_id, and legal_contact_email before "
348
- "compliance can review an export exception."
349
- ),
350
- ),
351
- KnowledgeSnippet(
352
- article_id="CMP-403",
353
- title="Customer response rules for regulated exceptions",
354
- content=(
355
- "Replies must explain that no temporary bypass can be granted yet, mention the "
356
- "compliance review, and request the required legal approval details."
357
- ),
358
- ),
359
- ),
360
- gold_queue="compliance_ops",
361
- gold_priority="high",
362
- gold_issue_type="regulated_exception",
363
- gold_status="waiting_on_customer",
364
- gold_resolution_code="legal_approval_required",
365
- required_requested_fields=("tenant_region", "dpa_amendment_id", "legal_contact_email"),
366
- required_reply_markers=(
367
- ("no temporary bypass", "cannot provide a bypass", "can’t provide a bypass"),
368
- ("compliance review", "compliance team"),
369
- ("tenant_region", "tenant region"),
370
- ("dpa_amendment_id", "dpa amendment", "amendment id"),
371
- ),
372
- required_note_markers=(
373
- ("audit", "audit review"),
374
- ("eu residency", "policy block"),
375
- ("manual bypass", "workaround"),
376
- ),
377
- forbidden_reply_markers=("engineering workaround", "disable the restriction", "temporary unlock approved"),
378
- risk_flags=("regulated_data_risk", "unsafe_shortcut_pressure", "over_escalation_risk"),
379
- follow_up_outcome="incorrect",
380
- follow_up_message=(
381
- "Customer follow-up: sent a screenshot and export job ID, but did not include the DPA "
382
- "amendment ID or legal contact."
383
- ),
384
- follow_up_wrong_fields=("screenshot", "job_id"),
385
- sla_step_cost=16,
386
- over_escalation_queues=("platform_engineering",),
387
- max_steps=8,
388
- ),
389
- }
390
-
391
-
392
- def get_task(task_id: str) -> SupportTaskSpec:
393
- """Return a task definition or raise a helpful error."""
394
-
395
- try:
396
- return TASKS[task_id]
397
- except KeyError as exc: # pragma: no cover - defensive
398
- valid = ", ".join(sorted(TASKS))
399
- raise ValueError(f"Unknown task_id '{task_id}'. Valid task ids: {valid}") from exc
400
-
401
-
402
- def list_task_ids() -> list[str]:
403
- """List tasks in a stable evaluation order."""
404
-
405
- return list(TASKS)