Spaces:
Sleeping
Sleeping
Delete folder supportdesk_env with huggingface_hub
Browse files- supportdesk_env/__init__.py +0 -46
- supportdesk_env/client.py +0 -38
- supportdesk_env/graders.py +0 -167
- supportdesk_env/models.py +0 -122
- supportdesk_env/openenv_compat.py +0 -76
- supportdesk_env/policies.py +0 -84
- supportdesk_env/server/__init__.py +0 -1
- supportdesk_env/server/app.py +0 -245
- supportdesk_env/server/supportdesk_environment.py +0 -545
- supportdesk_env/tasks.py +0 -405
supportdesk_env/__init__.py
DELETED
|
@@ -1,46 +0,0 @@
|
|
| 1 |
-
"""SupportDesk OpenEnv environment package."""
|
| 2 |
-
|
| 3 |
-
from supportdesk_env.graders import GradeBreakdown, grade_case, grade_task_id
|
| 4 |
-
from supportdesk_env.models import (
|
| 5 |
-
ActionHistoryEntry,
|
| 6 |
-
KnowledgeSnippet,
|
| 7 |
-
SupportCaseProgress,
|
| 8 |
-
SupportDeskAction,
|
| 9 |
-
SupportDeskObservation,
|
| 10 |
-
SupportDeskState,
|
| 11 |
-
SupportTicket,
|
| 12 |
-
)
|
| 13 |
-
from supportdesk_env.policies import default_note, default_reply, heuristic_action
|
| 14 |
-
from supportdesk_env.tasks import TASKS, SupportTaskSpec, get_task, list_task_ids
|
| 15 |
-
|
| 16 |
-
try:
|
| 17 |
-
from supportdesk_env.client import SupportDeskEnv
|
| 18 |
-
except ImportError: # pragma: no cover - local unit tests can run without openenv-core
|
| 19 |
-
SupportDeskEnv = None # type: ignore[assignment]
|
| 20 |
-
|
| 21 |
-
try:
|
| 22 |
-
from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment
|
| 23 |
-
except ImportError: # pragma: no cover - guarded for partial local setups
|
| 24 |
-
SupportDeskEnvironment = None # type: ignore[assignment]
|
| 25 |
-
|
| 26 |
-
__all__ = [
|
| 27 |
-
"ActionHistoryEntry",
|
| 28 |
-
"GradeBreakdown",
|
| 29 |
-
"KnowledgeSnippet",
|
| 30 |
-
"SupportCaseProgress",
|
| 31 |
-
"SupportDeskAction",
|
| 32 |
-
"SupportDeskEnv",
|
| 33 |
-
"SupportDeskEnvironment",
|
| 34 |
-
"SupportDeskObservation",
|
| 35 |
-
"SupportDeskState",
|
| 36 |
-
"SupportTaskSpec",
|
| 37 |
-
"SupportTicket",
|
| 38 |
-
"TASKS",
|
| 39 |
-
"default_note",
|
| 40 |
-
"default_reply",
|
| 41 |
-
"get_task",
|
| 42 |
-
"grade_case",
|
| 43 |
-
"grade_task_id",
|
| 44 |
-
"heuristic_action",
|
| 45 |
-
"list_task_ids",
|
| 46 |
-
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
supportdesk_env/client.py
DELETED
|
@@ -1,38 +0,0 @@
|
|
| 1 |
-
"""HTTP client for interacting with a deployed SupportDesk environment."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
from supportdesk_env.models import SupportDeskAction, SupportDeskObservation, SupportDeskState
|
| 6 |
-
from supportdesk_env.openenv_compat import EnvClient, StepResult
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
def _validate(model_cls, payload):
|
| 10 |
-
if hasattr(model_cls, "model_validate"):
|
| 11 |
-
return model_cls.model_validate(payload)
|
| 12 |
-
return model_cls(**payload) # pragma: no cover - pydantic v1 fallback
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
class SupportDeskEnv(EnvClient[SupportDeskAction, SupportDeskObservation, SupportDeskState]):
|
| 16 |
-
"""Typed client for a locally running or deployed OpenEnv server."""
|
| 17 |
-
|
| 18 |
-
def _step_payload(self, action: SupportDeskAction) -> dict:
|
| 19 |
-
"""Convert a typed action into the JSON payload expected by the server."""
|
| 20 |
-
|
| 21 |
-
if hasattr(action, "model_dump"):
|
| 22 |
-
return action.model_dump()
|
| 23 |
-
return action.dict()
|
| 24 |
-
|
| 25 |
-
def _parse_state(self, payload) -> SupportDeskState:
|
| 26 |
-
return _validate(SupportDeskState, payload)
|
| 27 |
-
|
| 28 |
-
def _parse_reset(self, payload) -> SupportDeskObservation:
|
| 29 |
-
return _validate(SupportDeskObservation, payload)
|
| 30 |
-
|
| 31 |
-
def _parse_result(self, payload) -> StepResult[SupportDeskObservation]:
|
| 32 |
-
observation = _validate(SupportDeskObservation, payload["observation"])
|
| 33 |
-
# OpenEnv StepResult only accepts observation/reward/done in this runtime.
|
| 34 |
-
return StepResult(
|
| 35 |
-
observation=observation,
|
| 36 |
-
reward=payload["reward"],
|
| 37 |
-
done=payload["done"],
|
| 38 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
supportdesk_env/graders.py
DELETED
|
@@ -1,167 +0,0 @@
|
|
| 1 |
-
"""Deterministic graders and reward helpers for SupportDesk."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
import re
|
| 6 |
-
from dataclasses import dataclass
|
| 7 |
-
|
| 8 |
-
from supportdesk_env.models import SupportCaseProgress
|
| 9 |
-
from supportdesk_env.tasks import SupportTaskSpec, get_task
|
| 10 |
-
|
| 11 |
-
STRICT_SCORE_EPSILON = 0.01
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
@dataclass(frozen=True)
|
| 15 |
-
class GradeBreakdown:
|
| 16 |
-
"""A scored view of how close a case is to the gold solution."""
|
| 17 |
-
|
| 18 |
-
total_score: float
|
| 19 |
-
queue_score: float
|
| 20 |
-
priority_score: float
|
| 21 |
-
issue_type_score: float
|
| 22 |
-
requested_fields_score: float
|
| 23 |
-
reply_score: float
|
| 24 |
-
note_score: float
|
| 25 |
-
status_score: float
|
| 26 |
-
resolution_score: float
|
| 27 |
-
completed_milestones: tuple[str, ...]
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
def _normalize(text: str | None) -> str:
|
| 31 |
-
if not text:
|
| 32 |
-
return ""
|
| 33 |
-
normalized = text.lower().replace("-", " ")
|
| 34 |
-
return re.sub(r"[^a-z0-9\s]", " ", normalized)
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
def _marker_group_score(text: str | None, marker_groups: tuple[tuple[str, ...], ...]) -> float:
|
| 38 |
-
if not marker_groups:
|
| 39 |
-
return 1.0
|
| 40 |
-
|
| 41 |
-
normalized = _normalize(text)
|
| 42 |
-
if not normalized:
|
| 43 |
-
return 0.0
|
| 44 |
-
|
| 45 |
-
matches = 0
|
| 46 |
-
for group in marker_groups:
|
| 47 |
-
if any(_normalize(marker) in normalized for marker in group):
|
| 48 |
-
matches += 1
|
| 49 |
-
return matches / len(marker_groups)
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
def _requested_fields_score(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
|
| 53 |
-
required = set(task.required_requested_fields)
|
| 54 |
-
requested = set(case.requested_fields)
|
| 55 |
-
|
| 56 |
-
if not required:
|
| 57 |
-
return 1.0 if not requested else 0.0
|
| 58 |
-
if not requested:
|
| 59 |
-
return 0.0
|
| 60 |
-
|
| 61 |
-
matched = len(required.intersection(requested))
|
| 62 |
-
extras = len(requested.difference(required))
|
| 63 |
-
raw = matched / len(required)
|
| 64 |
-
penalty = min(0.25, extras * 0.05)
|
| 65 |
-
return max(0.0, raw - penalty)
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
def _reply_penalty(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
|
| 69 |
-
text = _normalize(case.reply)
|
| 70 |
-
if not text:
|
| 71 |
-
return 0.0
|
| 72 |
-
return 0.0 if not any(_normalize(marker) in text for marker in task.forbidden_reply_markers) else 0.5
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
def _strict_open_unit_interval(score: float) -> float:
|
| 76 |
-
"""Keep final task scores strictly within (0, 1) for evaluator compatibility."""
|
| 77 |
-
|
| 78 |
-
return min(1.0 - STRICT_SCORE_EPSILON, max(STRICT_SCORE_EPSILON, score))
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
def grade_case(task: SupportTaskSpec, case: SupportCaseProgress) -> GradeBreakdown:
|
| 82 |
-
"""Score a case deterministically with total_score strictly inside (0, 1)."""
|
| 83 |
-
|
| 84 |
-
queue_score = 1.0 if case.queue == task.gold_queue else 0.0
|
| 85 |
-
priority_score = 1.0 if case.priority == task.gold_priority else 0.0
|
| 86 |
-
issue_type_score = 1.0 if case.issue_type == task.gold_issue_type else 0.0
|
| 87 |
-
requested_fields_score = _requested_fields_score(case, task)
|
| 88 |
-
reply_score = max(0.0, _marker_group_score(case.reply, task.required_reply_markers) - _reply_penalty(case, task))
|
| 89 |
-
note_score = _marker_group_score(case.internal_note, task.required_note_markers)
|
| 90 |
-
status_score = 1.0 if case.status == task.gold_status else 0.0
|
| 91 |
-
resolution_score = 1.0 if case.resolution_code == task.gold_resolution_code else 0.0
|
| 92 |
-
|
| 93 |
-
weighted_total = (
|
| 94 |
-
queue_score * 0.15
|
| 95 |
-
+ priority_score * 0.10
|
| 96 |
-
+ issue_type_score * 0.10
|
| 97 |
-
+ requested_fields_score * 0.15
|
| 98 |
-
+ reply_score * 0.25
|
| 99 |
-
+ note_score * 0.10
|
| 100 |
-
+ status_score * 0.10
|
| 101 |
-
+ resolution_score * 0.05
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
milestones: list[str] = []
|
| 105 |
-
if queue_score:
|
| 106 |
-
milestones.append("queue")
|
| 107 |
-
if priority_score:
|
| 108 |
-
milestones.append("priority")
|
| 109 |
-
if issue_type_score:
|
| 110 |
-
milestones.append("issue_type")
|
| 111 |
-
if requested_fields_score >= 0.99:
|
| 112 |
-
milestones.append("requested_fields")
|
| 113 |
-
if reply_score >= 0.99:
|
| 114 |
-
milestones.append("reply")
|
| 115 |
-
if note_score >= 0.99:
|
| 116 |
-
milestones.append("internal_note")
|
| 117 |
-
if status_score:
|
| 118 |
-
milestones.append("status")
|
| 119 |
-
if resolution_score:
|
| 120 |
-
milestones.append("resolution_code")
|
| 121 |
-
|
| 122 |
-
return GradeBreakdown(
|
| 123 |
-
total_score=round(_strict_open_unit_interval(weighted_total), 4),
|
| 124 |
-
queue_score=queue_score,
|
| 125 |
-
priority_score=priority_score,
|
| 126 |
-
issue_type_score=issue_type_score,
|
| 127 |
-
requested_fields_score=round(requested_fields_score, 4),
|
| 128 |
-
reply_score=round(reply_score, 4),
|
| 129 |
-
note_score=round(note_score, 4),
|
| 130 |
-
status_score=status_score,
|
| 131 |
-
resolution_score=resolution_score,
|
| 132 |
-
completed_milestones=tuple(milestones),
|
| 133 |
-
)
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
def grade_task_id(task_id: str, case: SupportCaseProgress) -> GradeBreakdown:
|
| 137 |
-
"""Convenience wrapper used by tests and evaluation scripts."""
|
| 138 |
-
|
| 139 |
-
return grade_case(get_task(task_id), case)
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
class _TaskSpecificGrader:
|
| 143 |
-
"""Importable task-specific grader wrapper for validator task discovery."""
|
| 144 |
-
|
| 145 |
-
task_id: str = ""
|
| 146 |
-
|
| 147 |
-
def grade(self, case: SupportCaseProgress) -> float:
|
| 148 |
-
return grade_task_id(self.task_id, case).total_score
|
| 149 |
-
|
| 150 |
-
def __call__(self, case: SupportCaseProgress) -> float:
|
| 151 |
-
return self.grade(case)
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
class BillingRefundEasyGrader(_TaskSpecificGrader):
|
| 155 |
-
task_id = "billing_refund_easy"
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
class AccountTakeoverMediumGrader(_TaskSpecificGrader):
|
| 159 |
-
task_id = "account_takeover_medium"
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
class ApiIncidentHardGrader(_TaskSpecificGrader):
|
| 163 |
-
task_id = "api_incident_hard"
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
class RegulatedExportExceptionHardGrader(_TaskSpecificGrader):
|
| 167 |
-
task_id = "regulated_export_exception_hard"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
supportdesk_env/models.py
DELETED
|
@@ -1,122 +0,0 @@
|
|
| 1 |
-
"""Typed models for the SupportDesk OpenEnv environment."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
from typing import Literal
|
| 6 |
-
|
| 7 |
-
from pydantic import BaseModel, Field
|
| 8 |
-
|
| 9 |
-
from supportdesk_env.openenv_compat import Action, Observation, State
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
class KnowledgeSnippet(BaseModel):
|
| 13 |
-
"""A policy or runbook excerpt the agent can use during triage."""
|
| 14 |
-
|
| 15 |
-
article_id: str
|
| 16 |
-
title: str
|
| 17 |
-
content: str
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
class SupportTicket(BaseModel):
|
| 21 |
-
"""Static task input representing the inbound support ticket."""
|
| 22 |
-
|
| 23 |
-
customer_name: str
|
| 24 |
-
customer_tier: Literal["free", "pro", "enterprise"]
|
| 25 |
-
company: str
|
| 26 |
-
subject: str
|
| 27 |
-
body: str
|
| 28 |
-
region: str
|
| 29 |
-
affected_users: int | None = None
|
| 30 |
-
sla_minutes_remaining: int | None = None
|
| 31 |
-
business_impact: str | None = None
|
| 32 |
-
secondary_concerns: list[str] = Field(default_factory=list)
|
| 33 |
-
attachments: list[str] = Field(default_factory=list)
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
class ActionHistoryEntry(BaseModel):
|
| 37 |
-
"""A concise trace entry used in observations and state dumps."""
|
| 38 |
-
|
| 39 |
-
step: int
|
| 40 |
-
operation: str
|
| 41 |
-
summary: str
|
| 42 |
-
reward_delta: float = 0.0
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
class CustomerFollowUp(BaseModel):
|
| 46 |
-
"""A scripted customer response that arrives after a request for more information."""
|
| 47 |
-
|
| 48 |
-
status: Literal["none", "pending", "partial", "complete", "incorrect"] = "none"
|
| 49 |
-
message: str | None = None
|
| 50 |
-
provided_fields: list[str] = Field(default_factory=list)
|
| 51 |
-
wrong_fields: list[str] = Field(default_factory=list)
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
class SupportCaseProgress(BaseModel):
|
| 55 |
-
"""Mutable case state that graders score against."""
|
| 56 |
-
|
| 57 |
-
queue: str | None = None
|
| 58 |
-
priority: str | None = None
|
| 59 |
-
issue_type: str | None = None
|
| 60 |
-
status: str = "new"
|
| 61 |
-
resolution_code: str | None = None
|
| 62 |
-
requested_fields: list[str] = Field(default_factory=list)
|
| 63 |
-
reply: str | None = None
|
| 64 |
-
internal_note: str | None = None
|
| 65 |
-
customer_follow_up: CustomerFollowUp = Field(default_factory=CustomerFollowUp)
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
class SupportDeskAction(Action):
|
| 69 |
-
"""One structured action the agent can take at each step."""
|
| 70 |
-
|
| 71 |
-
operation: Literal["classify", "request_info", "draft_reply", "add_internal_note", "submit", "wait"]
|
| 72 |
-
queue: str | None = None
|
| 73 |
-
priority: str | None = None
|
| 74 |
-
issue_type: str | None = None
|
| 75 |
-
status: str | None = None
|
| 76 |
-
resolution_code: str | None = None
|
| 77 |
-
requested_fields: list[str] = Field(default_factory=list)
|
| 78 |
-
reply: str | None = None
|
| 79 |
-
internal_note: str | None = None
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
class SupportDeskObservation(Observation):
|
| 83 |
-
"""Observation emitted to the agent after reset and each step."""
|
| 84 |
-
|
| 85 |
-
task_id: str
|
| 86 |
-
difficulty: Literal["easy", "medium", "hard"]
|
| 87 |
-
objective: str
|
| 88 |
-
ticket: SupportTicket
|
| 89 |
-
knowledge_base: list[KnowledgeSnippet]
|
| 90 |
-
available_queues: list[str]
|
| 91 |
-
available_priorities: list[str]
|
| 92 |
-
available_statuses: list[str]
|
| 93 |
-
available_issue_types: list[str]
|
| 94 |
-
case: SupportCaseProgress
|
| 95 |
-
current_sla_minutes_remaining: int | None = None
|
| 96 |
-
workflow_stage: str
|
| 97 |
-
required_next_actions: list[str] = Field(default_factory=list)
|
| 98 |
-
risk_flags: list[str] = Field(default_factory=list)
|
| 99 |
-
action_history: list[ActionHistoryEntry] = Field(default_factory=list)
|
| 100 |
-
feedback: str = ""
|
| 101 |
-
remaining_steps: int = 0
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
class SupportDeskState(State):
|
| 105 |
-
"""Current environment state returned by the OpenEnv state() API."""
|
| 106 |
-
|
| 107 |
-
episode_id: str | None = None
|
| 108 |
-
task_id: str
|
| 109 |
-
difficulty: Literal["easy", "medium", "hard"]
|
| 110 |
-
step_count: int = 0
|
| 111 |
-
reward: float = 0.0
|
| 112 |
-
done: bool = False
|
| 113 |
-
current_score: float = 0.0
|
| 114 |
-
max_steps: int = 0
|
| 115 |
-
case: SupportCaseProgress
|
| 116 |
-
current_sla_minutes_remaining: int | None = None
|
| 117 |
-
workflow_stage: str
|
| 118 |
-
required_next_actions: list[str] = Field(default_factory=list)
|
| 119 |
-
risk_flags: list[str] = Field(default_factory=list)
|
| 120 |
-
action_history: list[ActionHistoryEntry] = Field(default_factory=list)
|
| 121 |
-
completed_milestones: list[str] = Field(default_factory=list)
|
| 122 |
-
last_feedback: str = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
supportdesk_env/openenv_compat.py
DELETED
|
@@ -1,76 +0,0 @@
|
|
| 1 |
-
"""Compatibility helpers for environments where openenv-core is not installed."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
from dataclasses import dataclass, field
|
| 6 |
-
from typing import Any, Generic, TypeVar
|
| 7 |
-
|
| 8 |
-
from pydantic import BaseModel
|
| 9 |
-
|
| 10 |
-
A = TypeVar("A")
|
| 11 |
-
O = TypeVar("O")
|
| 12 |
-
S = TypeVar("S")
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
OPENENV_AVAILABLE = True
|
| 16 |
-
|
| 17 |
-
try:
|
| 18 |
-
from openenv.core.client_types import StepResult # type: ignore
|
| 19 |
-
from openenv.core.env_client import EnvClient # type: ignore
|
| 20 |
-
from openenv.core.env_server.interfaces import Environment # type: ignore
|
| 21 |
-
from openenv.core.env_server.types import Action, Observation, State # type: ignore
|
| 22 |
-
from openenv.core.env_server.types import EnvironmentMetadata # type: ignore
|
| 23 |
-
except ImportError:
|
| 24 |
-
try:
|
| 25 |
-
from openenv_core.client_types import StepResult # type: ignore
|
| 26 |
-
from openenv_core.http_env_client import HTTPEnvClient as EnvClient # type: ignore
|
| 27 |
-
from openenv_core.env_server.interfaces import Environment # type: ignore
|
| 28 |
-
from openenv_core.env_server.types import Action, Observation, State # type: ignore
|
| 29 |
-
from openenv_core.env_server.types import EnvironmentMetadata # type: ignore
|
| 30 |
-
except ImportError:
|
| 31 |
-
OPENENV_AVAILABLE = False
|
| 32 |
-
|
| 33 |
-
class Action(BaseModel):
|
| 34 |
-
"""Fallback Action base type for local import-only workflows."""
|
| 35 |
-
|
| 36 |
-
class Observation(BaseModel):
|
| 37 |
-
"""Fallback Observation base type for local import-only workflows."""
|
| 38 |
-
|
| 39 |
-
reward: float = 0.0
|
| 40 |
-
done: bool = False
|
| 41 |
-
|
| 42 |
-
class State(BaseModel):
|
| 43 |
-
"""Fallback State base type for local import-only workflows."""
|
| 44 |
-
|
| 45 |
-
class Environment(Generic[A, O, S]):
|
| 46 |
-
"""Minimal base class used for local unit tests and import-based demos."""
|
| 47 |
-
|
| 48 |
-
def __init__(self) -> None:
|
| 49 |
-
super().__init__()
|
| 50 |
-
|
| 51 |
-
class EnvironmentMetadata(BaseModel):
|
| 52 |
-
"""Fallback metadata model used when OpenEnv is absent."""
|
| 53 |
-
|
| 54 |
-
name: str
|
| 55 |
-
description: str
|
| 56 |
-
readme_content: str | None = None
|
| 57 |
-
version: str | None = None
|
| 58 |
-
author: str | None = None
|
| 59 |
-
|
| 60 |
-
@dataclass
|
| 61 |
-
class StepResult(Generic[O]):
|
| 62 |
-
"""Fallback step result for local-only client compatibility."""
|
| 63 |
-
|
| 64 |
-
observation: O
|
| 65 |
-
reward: float
|
| 66 |
-
done: bool
|
| 67 |
-
info: dict[str, Any] = field(default_factory=dict)
|
| 68 |
-
|
| 69 |
-
class EnvClient(Generic[A, O, S]):
|
| 70 |
-
"""Placeholder client that fails only when actually used."""
|
| 71 |
-
|
| 72 |
-
def __init__(self, *args, **kwargs) -> None:
|
| 73 |
-
raise ImportError(
|
| 74 |
-
"SupportDeskEnv requires openenv-core to be installed. "
|
| 75 |
-
"Run `py -3 -m pip install openenv-core` to use the HTTP client."
|
| 76 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
supportdesk_env/policies.py
DELETED
|
@@ -1,84 +0,0 @@
|
|
| 1 |
-
"""Reusable policy helpers for local baselines and training examples."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
from supportdesk_env.models import SupportDeskAction, SupportDeskObservation
|
| 6 |
-
from supportdesk_env.tasks import get_task
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
def default_reply(task_id: str) -> str:
|
| 10 |
-
"""Return a task-specific high-signal customer reply."""
|
| 11 |
-
|
| 12 |
-
if task_id == "billing_refund_easy":
|
| 13 |
-
return (
|
| 14 |
-
"Thanks for flagging the duplicate charge. I have started the refund for the extra "
|
| 15 |
-
"charge, and the funds usually appear within 5-7 business days."
|
| 16 |
-
)
|
| 17 |
-
if task_id == "account_takeover_medium":
|
| 18 |
-
return (
|
| 19 |
-
"We have escalated this to our trust team. Please reset your password, scan your "
|
| 20 |
-
"device for malware, and reply with your workspace_id, last successful login time, "
|
| 21 |
-
"and billing email so we can verify the account safely."
|
| 22 |
-
)
|
| 23 |
-
if task_id == "regulated_export_exception_hard":
|
| 24 |
-
return (
|
| 25 |
-
"We cannot provide a bypass or temporary unlock yet. Our compliance team is running "
|
| 26 |
-
"a compliance review, and we need your tenant_region, dpa_amendment_id, and "
|
| 27 |
-
"legal_contact_email to continue that review."
|
| 28 |
-
)
|
| 29 |
-
return (
|
| 30 |
-
"We are treating this as an active incident and our on-call engineering team is engaged. "
|
| 31 |
-
"Please send the affected request IDs, UTC timestamps, and the impacted region so we can "
|
| 32 |
-
"speed up the investigation."
|
| 33 |
-
)
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def default_note(task_id: str) -> str:
|
| 37 |
-
"""Return a task-specific internal note."""
|
| 38 |
-
|
| 39 |
-
if task_id == "billing_refund_easy":
|
| 40 |
-
return "Duplicate charge confirmed from attached invoice; refund approved."
|
| 41 |
-
if task_id == "account_takeover_medium":
|
| 42 |
-
return "Suspicious login alert reported and customer is locked out."
|
| 43 |
-
if task_id == "regulated_export_exception_hard":
|
| 44 |
-
return (
|
| 45 |
-
"Audit-driven export exception request tied to an EU residency policy block; "
|
| 46 |
-
"customer asked for a manual bypass before legal approval."
|
| 47 |
-
)
|
| 48 |
-
return "EU data residency rollout hit intermittent HTTP 500s and the customer launches tonight."
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
def heuristic_action(observation: SupportDeskObservation) -> SupportDeskAction:
|
| 52 |
-
"""Deterministic high-performing policy used by the baseline."""
|
| 53 |
-
|
| 54 |
-
task = get_task(observation.task_id)
|
| 55 |
-
case = observation.case
|
| 56 |
-
|
| 57 |
-
if case.queue is None or case.priority is None or case.issue_type is None:
|
| 58 |
-
return SupportDeskAction(
|
| 59 |
-
operation="classify",
|
| 60 |
-
queue=task.gold_queue,
|
| 61 |
-
priority=task.gold_priority,
|
| 62 |
-
issue_type=task.gold_issue_type,
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
if task.required_requested_fields and sorted(case.requested_fields) != sorted(task.required_requested_fields):
|
| 66 |
-
return SupportDeskAction(
|
| 67 |
-
operation="request_info",
|
| 68 |
-
requested_fields=list(task.required_requested_fields),
|
| 69 |
-
)
|
| 70 |
-
|
| 71 |
-
if case.customer_follow_up.status == "pending":
|
| 72 |
-
return SupportDeskAction(operation="wait")
|
| 73 |
-
|
| 74 |
-
if not case.reply:
|
| 75 |
-
return SupportDeskAction(operation="draft_reply", reply=default_reply(observation.task_id))
|
| 76 |
-
|
| 77 |
-
if not case.internal_note:
|
| 78 |
-
return SupportDeskAction(operation="add_internal_note", internal_note=default_note(observation.task_id))
|
| 79 |
-
|
| 80 |
-
return SupportDeskAction(
|
| 81 |
-
operation="submit",
|
| 82 |
-
status=task.gold_status,
|
| 83 |
-
resolution_code=task.gold_resolution_code,
|
| 84 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
supportdesk_env/server/__init__.py
DELETED
|
@@ -1 +0,0 @@
|
|
| 1 |
-
"""SupportDesk server package."""
|
|
|
|
|
|
supportdesk_env/server/app.py
DELETED
|
@@ -1,245 +0,0 @@
|
|
| 1 |
-
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
-
# All rights reserved.
|
| 3 |
-
#
|
| 4 |
-
# This source code is licensed under the BSD-style license found in the
|
| 5 |
-
# LICENSE file in the root directory of this source tree.
|
| 6 |
-
|
| 7 |
-
"""
|
| 8 |
-
FastAPI application for the SupportDesk environment.
|
| 9 |
-
|
| 10 |
-
This module creates an HTTP server that exposes the SupportDeskEnvironment
|
| 11 |
-
over HTTP and WebSocket endpoints, compatible with EnvClient.
|
| 12 |
-
|
| 13 |
-
Endpoints:
|
| 14 |
-
- POST /reset: Reset the environment
|
| 15 |
-
- POST /step: Execute an action
|
| 16 |
-
- GET /state: Get current environment state
|
| 17 |
-
- GET /schema: Get action/observation schemas
|
| 18 |
-
- WS /ws: WebSocket endpoint for persistent sessions
|
| 19 |
-
- GET /tasks: Get task catalog metadata
|
| 20 |
-
|
| 21 |
-
Usage:
|
| 22 |
-
# Development (with auto-reload):
|
| 23 |
-
uvicorn supportdesk_env.server.app:app --reload --host 0.0.0.0 --port 8000
|
| 24 |
-
|
| 25 |
-
# Production:
|
| 26 |
-
uvicorn supportdesk_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 27 |
-
|
| 28 |
-
# Or run directly:
|
| 29 |
-
python -m supportdesk_env.server.app
|
| 30 |
-
"""
|
| 31 |
-
|
| 32 |
-
from __future__ import annotations
|
| 33 |
-
|
| 34 |
-
import os
|
| 35 |
-
from typing import Any
|
| 36 |
-
|
| 37 |
-
import uvicorn
|
| 38 |
-
from fastapi import Body, HTTPException
|
| 39 |
-
from fastapi.routing import APIRoute
|
| 40 |
-
|
| 41 |
-
try:
|
| 42 |
-
from openenv.core.env_server import http_server as openenv_http_server
|
| 43 |
-
except ImportError:
|
| 44 |
-
try:
|
| 45 |
-
from openenv_core.env_server import http_server as openenv_http_server
|
| 46 |
-
except Exception as e: # pragma: no cover
|
| 47 |
-
raise ImportError(
|
| 48 |
-
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
|
| 49 |
-
) from e
|
| 50 |
-
|
| 51 |
-
try:
|
| 52 |
-
from ..models import SupportDeskAction, SupportDeskObservation, SupportDeskState
|
| 53 |
-
from ..tasks import TASKS
|
| 54 |
-
from .supportdesk_environment import SupportDeskEnvironment
|
| 55 |
-
except ModuleNotFoundError:
|
| 56 |
-
from supportdesk_env.models import SupportDeskAction, SupportDeskObservation, SupportDeskState
|
| 57 |
-
from supportdesk_env.tasks import TASKS
|
| 58 |
-
from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment
|
| 59 |
-
|
| 60 |
-
# Bind the default OpenEnv /state route to the full typed state model.
|
| 61 |
-
openenv_http_server.State = SupportDeskState
|
| 62 |
-
create_app = openenv_http_server.create_app
|
| 63 |
-
|
| 64 |
-
# Create the app with web interface and README integration.
|
| 65 |
-
app = create_app(
|
| 66 |
-
SupportDeskEnvironment,
|
| 67 |
-
SupportDeskAction,
|
| 68 |
-
SupportDeskObservation,
|
| 69 |
-
env_name="supportdesk_env",
|
| 70 |
-
max_concurrent_envs=1, # increase this number to allow more concurrent WebSocket sessions
|
| 71 |
-
)
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
TASK_GRADER_PATHS = {
|
| 75 |
-
"billing_refund_easy": "graders:BillingRefundEasyGrader",
|
| 76 |
-
"account_takeover_medium": "graders:AccountTakeoverMediumGrader",
|
| 77 |
-
"api_incident_hard": "graders:ApiIncidentHardGrader",
|
| 78 |
-
"regulated_export_exception_hard": "graders:RegulatedExportExceptionHardGrader",
|
| 79 |
-
}
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
def _replace_route(path: str, methods: set[str]) -> None:
|
| 83 |
-
"""Remove a generated route so we can register a score-aware replacement."""
|
| 84 |
-
|
| 85 |
-
app.router.routes = [
|
| 86 |
-
route
|
| 87 |
-
for route in app.router.routes
|
| 88 |
-
if not (
|
| 89 |
-
isinstance(route, APIRoute)
|
| 90 |
-
and route.path == path
|
| 91 |
-
and methods.issubset(set(route.methods or set()))
|
| 92 |
-
)
|
| 93 |
-
]
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
def _score_response(env: SupportDeskEnvironment, observation: SupportDeskObservation) -> dict[str, Any]:
|
| 97 |
-
"""Return the standard OpenEnv shape plus an explicit top-level score."""
|
| 98 |
-
|
| 99 |
-
return {
|
| 100 |
-
"observation": observation.model_dump(),
|
| 101 |
-
"reward": observation.reward,
|
| 102 |
-
"done": observation.done,
|
| 103 |
-
"score": env.state.current_score,
|
| 104 |
-
}
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
_replace_route("/reset", {"POST"})
|
| 108 |
-
_replace_route("/step", {"POST"})
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
@app.post("/reset")
|
| 112 |
-
async def reset_with_score(
|
| 113 |
-
request: openenv_http_server.ResetRequest = Body(default_factory=openenv_http_server.ResetRequest),
|
| 114 |
-
) -> dict[str, Any]:
|
| 115 |
-
"""Reset the environment and expose the initial deterministic score at top level."""
|
| 116 |
-
|
| 117 |
-
env = SupportDeskEnvironment()
|
| 118 |
-
try:
|
| 119 |
-
kwargs = request.model_dump(exclude_unset=True)
|
| 120 |
-
observation = env.reset(**kwargs)
|
| 121 |
-
return _score_response(env, observation)
|
| 122 |
-
finally:
|
| 123 |
-
env.close()
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
@app.post("/step")
|
| 127 |
-
async def step_with_score(request: openenv_http_server.StepRequest) -> dict[str, Any]:
|
| 128 |
-
"""Execute a step and expose the current deterministic score at top level."""
|
| 129 |
-
|
| 130 |
-
action_data = request.action
|
| 131 |
-
try:
|
| 132 |
-
action = openenv_http_server.deserialize_action(action_data, SupportDeskAction)
|
| 133 |
-
except openenv_http_server.ValidationError as exc:
|
| 134 |
-
raise HTTPException(status_code=422, detail=exc.errors()) from exc
|
| 135 |
-
|
| 136 |
-
env = SupportDeskEnvironment()
|
| 137 |
-
try:
|
| 138 |
-
kwargs = request.model_dump(exclude_unset=True, exclude={"action"})
|
| 139 |
-
observation = env.step(action, **kwargs)
|
| 140 |
-
return _score_response(env, observation)
|
| 141 |
-
finally:
|
| 142 |
-
env.close()
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
@app.get("/tasks")
|
| 146 |
-
def list_tasks() -> dict[str, Any]:
|
| 147 |
-
"""Expose a stable task catalog for UI, debugging, and pre-submit checks."""
|
| 148 |
-
|
| 149 |
-
return {
|
| 150 |
-
"environment": {
|
| 151 |
-
"name": "supportdesk_env",
|
| 152 |
-
"version": "0.1.0",
|
| 153 |
-
"grader_type": "deterministic",
|
| 154 |
-
"score_range": [0.0, 1.0],
|
| 155 |
-
},
|
| 156 |
-
"total_tasks": len(TASKS),
|
| 157 |
-
"tasks": [
|
| 158 |
-
{
|
| 159 |
-
"task_id": task.task_id,
|
| 160 |
-
"grader": TASK_GRADER_PATHS[task.task_id],
|
| 161 |
-
"title": task.title,
|
| 162 |
-
"difficulty": task.difficulty,
|
| 163 |
-
"objective": task.objective,
|
| 164 |
-
"max_steps": task.max_steps,
|
| 165 |
-
"gold_issue_type": task.gold_issue_type,
|
| 166 |
-
"gold_queue": task.gold_queue,
|
| 167 |
-
"gold_priority": task.gold_priority,
|
| 168 |
-
"ticket_context": {
|
| 169 |
-
"customer_tier": task.ticket.customer_tier,
|
| 170 |
-
"region": task.ticket.region,
|
| 171 |
-
"affected_users": task.ticket.affected_users,
|
| 172 |
-
"sla_minutes_remaining": task.ticket.sla_minutes_remaining,
|
| 173 |
-
},
|
| 174 |
-
}
|
| 175 |
-
for task in TASKS.values()
|
| 176 |
-
],
|
| 177 |
-
}
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
@app.get("/episodes/{episode_id}/state", response_model=SupportDeskState)
|
| 181 |
-
def get_episode_state(episode_id: str) -> SupportDeskState:
|
| 182 |
-
"""Optional explicit state helper for robust episode-addressable inspection."""
|
| 183 |
-
|
| 184 |
-
try:
|
| 185 |
-
return SupportDeskEnvironment.state_for_episode(episode_id)
|
| 186 |
-
except ValueError as exc:
|
| 187 |
-
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
@app.post("/episodes/{episode_id}/step")
|
| 191 |
-
def step_episode(
|
| 192 |
-
episode_id: str,
|
| 193 |
-
payload: dict[str, Any] = Body(...),
|
| 194 |
-
) -> dict[str, Any]:
|
| 195 |
-
"""Optional explicit step helper that does not require sticky request context."""
|
| 196 |
-
|
| 197 |
-
action_payload = payload.get("action")
|
| 198 |
-
if not isinstance(action_payload, dict):
|
| 199 |
-
raise HTTPException(status_code=422, detail="Request body must include an 'action' object.")
|
| 200 |
-
|
| 201 |
-
timeout_s = payload.get("timeout_s")
|
| 202 |
-
try:
|
| 203 |
-
action = SupportDeskAction.model_validate(action_payload)
|
| 204 |
-
env = SupportDeskEnvironment()
|
| 205 |
-
observation = env.step(action, timeout_s=timeout_s, episode_id=episode_id)
|
| 206 |
-
except ValueError as exc:
|
| 207 |
-
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
| 208 |
-
|
| 209 |
-
return {
|
| 210 |
-
"observation": observation.model_dump(),
|
| 211 |
-
"reward": observation.reward,
|
| 212 |
-
"done": observation.done,
|
| 213 |
-
"score": SupportDeskEnvironment.state_for_episode(episode_id).current_score,
|
| 214 |
-
}
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
def main(host: str = "0.0.0.0", port: int = 8000) -> None:
|
| 218 |
-
"""
|
| 219 |
-
Entry point for direct execution via uv run or python -m.
|
| 220 |
-
|
| 221 |
-
This function enables running the server without Docker:
|
| 222 |
-
uv run --project . server
|
| 223 |
-
uv run --project . server --port 8001
|
| 224 |
-
python -m supportdesk_env.server.app
|
| 225 |
-
|
| 226 |
-
Args:
|
| 227 |
-
host: Host address to bind to (default: "0.0.0.0")
|
| 228 |
-
port: Port number to listen on (default: 8000)
|
| 229 |
-
|
| 230 |
-
For production deployments, consider using uvicorn directly with
|
| 231 |
-
multiple workers:
|
| 232 |
-
uvicorn supportdesk_env.server.app:app --workers 4
|
| 233 |
-
"""
|
| 234 |
-
|
| 235 |
-
uvicorn.run("supportdesk_env.server.app:app", host=host, port=port)
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
if __name__ == "__main__":
|
| 239 |
-
import argparse
|
| 240 |
-
|
| 241 |
-
parser = argparse.ArgumentParser()
|
| 242 |
-
parser.add_argument("--host", default=os.getenv("HOST", "0.0.0.0"))
|
| 243 |
-
parser.add_argument("--port", type=int, default=int(os.getenv("PORT", "8000")))
|
| 244 |
-
args = parser.parse_args()
|
| 245 |
-
main(host=args.host, port=args.port)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
supportdesk_env/server/supportdesk_environment.py
DELETED
|
@@ -1,545 +0,0 @@
|
|
| 1 |
-
"""SupportDesk environment implementation."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
import os
|
| 6 |
-
import threading
|
| 7 |
-
import uuid
|
| 8 |
-
from pathlib import Path
|
| 9 |
-
from typing import ClassVar
|
| 10 |
-
|
| 11 |
-
from supportdesk_env.graders import grade_case
|
| 12 |
-
from supportdesk_env.models import (
|
| 13 |
-
ActionHistoryEntry,
|
| 14 |
-
CustomerFollowUp,
|
| 15 |
-
SupportCaseProgress,
|
| 16 |
-
SupportDeskAction,
|
| 17 |
-
SupportDeskObservation,
|
| 18 |
-
SupportDeskState,
|
| 19 |
-
)
|
| 20 |
-
from supportdesk_env.openenv_compat import Environment, EnvironmentMetadata
|
| 21 |
-
from supportdesk_env.tasks import (
|
| 22 |
-
ALL_ISSUE_TYPES,
|
| 23 |
-
ALL_PRIORITIES,
|
| 24 |
-
ALL_QUEUES,
|
| 25 |
-
ALL_STATUSES,
|
| 26 |
-
SupportTaskSpec,
|
| 27 |
-
get_task,
|
| 28 |
-
list_task_ids,
|
| 29 |
-
)
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
class SupportDeskEnvironment(
|
| 33 |
-
Environment[SupportDeskAction, SupportDeskObservation, SupportDeskState]
|
| 34 |
-
):
|
| 35 |
-
"""A realistic customer support triage environment with dense rewards."""
|
| 36 |
-
|
| 37 |
-
_state_lock: ClassVar[threading.RLock] = threading.RLock()
|
| 38 |
-
_episode_store: ClassVar[dict[str, SupportDeskState]] = {}
|
| 39 |
-
_episode_task_ids: ClassVar[dict[str, str]] = {}
|
| 40 |
-
_latest_episode_id: ClassVar[str | None] = None
|
| 41 |
-
_shared_reset_counter: ClassVar[int] = 0
|
| 42 |
-
|
| 43 |
-
def __init__(self, task_id: str | None = None):
|
| 44 |
-
super().__init__()
|
| 45 |
-
env_task_id = os.getenv("SUPPORTDESK_TASK_ID")
|
| 46 |
-
self._explicit_task_id = task_id is not None or env_task_id is not None
|
| 47 |
-
requested_task = task_id or env_task_id or list_task_ids()[0]
|
| 48 |
-
self.task: SupportTaskSpec = get_task(requested_task)
|
| 49 |
-
self._max_steps = self.task.max_steps
|
| 50 |
-
self._step_count = 0
|
| 51 |
-
self._reward_total = 0.0
|
| 52 |
-
self._done = False
|
| 53 |
-
self._last_feedback = ""
|
| 54 |
-
self._history: list[ActionHistoryEntry] = []
|
| 55 |
-
self._case = SupportCaseProgress()
|
| 56 |
-
self._episode_id: str | None = None
|
| 57 |
-
self._current_sla_minutes_remaining = self.task.ticket.sla_minutes_remaining
|
| 58 |
-
initial_grade = grade_case(self.task, self._case)
|
| 59 |
-
self._score = initial_grade.total_score
|
| 60 |
-
self._completed_milestones = list(initial_grade.completed_milestones)
|
| 61 |
-
|
| 62 |
-
@classmethod
|
| 63 |
-
def _build_initial_state(cls, task: SupportTaskSpec, episode_id: str) -> SupportDeskState:
|
| 64 |
-
initial_case = SupportCaseProgress()
|
| 65 |
-
initial_grade = grade_case(task, initial_case)
|
| 66 |
-
return SupportDeskState(
|
| 67 |
-
episode_id=episode_id,
|
| 68 |
-
task_id=task.task_id,
|
| 69 |
-
difficulty=task.difficulty,
|
| 70 |
-
step_count=0,
|
| 71 |
-
reward=0.0,
|
| 72 |
-
done=False,
|
| 73 |
-
current_score=initial_grade.total_score,
|
| 74 |
-
max_steps=task.max_steps,
|
| 75 |
-
case=initial_case,
|
| 76 |
-
current_sla_minutes_remaining=task.ticket.sla_minutes_remaining,
|
| 77 |
-
workflow_stage="intake",
|
| 78 |
-
required_next_actions=["classify"],
|
| 79 |
-
risk_flags=[],
|
| 80 |
-
action_history=[],
|
| 81 |
-
completed_milestones=list(initial_grade.completed_milestones),
|
| 82 |
-
last_feedback="New case loaded. Review the ticket and policy snippets before acting.",
|
| 83 |
-
)
|
| 84 |
-
|
| 85 |
-
@classmethod
|
| 86 |
-
def _extract_episode_id(cls, episode_id: str | None = None, **kwargs) -> str | None:
|
| 87 |
-
if episode_id:
|
| 88 |
-
return episode_id
|
| 89 |
-
for key in ("episode_id", "request_id"):
|
| 90 |
-
value = kwargs.get(key)
|
| 91 |
-
if isinstance(value, str) and value:
|
| 92 |
-
return value
|
| 93 |
-
return None
|
| 94 |
-
|
| 95 |
-
def _load_episode(self, episode_id: str | None = None, **kwargs) -> None:
|
| 96 |
-
resolved_episode_id = self._extract_episode_id(episode_id, **kwargs) or self.__class__._latest_episode_id
|
| 97 |
-
if not resolved_episode_id:
|
| 98 |
-
return
|
| 99 |
-
|
| 100 |
-
episode_state = self.__class__._episode_store.get(resolved_episode_id)
|
| 101 |
-
if episode_state is None:
|
| 102 |
-
raise ValueError(
|
| 103 |
-
f"Unknown episode_id '{resolved_episode_id}'. Call reset() first or provide a valid episode_id."
|
| 104 |
-
)
|
| 105 |
-
|
| 106 |
-
task = get_task(self.__class__._episode_task_ids.get(resolved_episode_id, episode_state.task_id))
|
| 107 |
-
self.task = task
|
| 108 |
-
self._max_steps = episode_state.max_steps
|
| 109 |
-
self._step_count = episode_state.step_count
|
| 110 |
-
self._reward_total = episode_state.reward
|
| 111 |
-
self._done = episode_state.done
|
| 112 |
-
self._last_feedback = episode_state.last_feedback
|
| 113 |
-
self._history = [entry.model_copy(deep=True) for entry in episode_state.action_history]
|
| 114 |
-
self._case = episode_state.case.model_copy(deep=True)
|
| 115 |
-
self._episode_id = resolved_episode_id
|
| 116 |
-
self._score = episode_state.current_score
|
| 117 |
-
self._completed_milestones = list(episode_state.completed_milestones)
|
| 118 |
-
self._current_sla_minutes_remaining = episode_state.current_sla_minutes_remaining
|
| 119 |
-
|
| 120 |
-
def _persist_episode(self) -> None:
|
| 121 |
-
if self._episode_id is None:
|
| 122 |
-
return
|
| 123 |
-
self.__class__._episode_store[self._episode_id] = SupportDeskState(
|
| 124 |
-
episode_id=self._episode_id,
|
| 125 |
-
task_id=self.task.task_id,
|
| 126 |
-
difficulty=self.task.difficulty,
|
| 127 |
-
step_count=self._step_count,
|
| 128 |
-
reward=round(self._reward_total, 4),
|
| 129 |
-
done=self._done,
|
| 130 |
-
current_score=round(self._score, 4),
|
| 131 |
-
max_steps=self._max_steps,
|
| 132 |
-
case=self._case.model_copy(deep=True),
|
| 133 |
-
current_sla_minutes_remaining=self._current_sla_minutes_remaining,
|
| 134 |
-
workflow_stage=self._workflow_stage(),
|
| 135 |
-
required_next_actions=self._required_next_actions(),
|
| 136 |
-
risk_flags=self._risk_flags(),
|
| 137 |
-
action_history=[entry.model_copy(deep=True) for entry in self._history],
|
| 138 |
-
completed_milestones=list(self._completed_milestones),
|
| 139 |
-
last_feedback=self._last_feedback,
|
| 140 |
-
)
|
| 141 |
-
self.__class__._episode_task_ids[self._episode_id] = self.task.task_id
|
| 142 |
-
self.__class__._latest_episode_id = self._episode_id
|
| 143 |
-
|
| 144 |
-
@property
|
| 145 |
-
def state(self) -> SupportDeskState:
|
| 146 |
-
with self.__class__._state_lock:
|
| 147 |
-
self._load_episode()
|
| 148 |
-
return SupportDeskState(
|
| 149 |
-
episode_id=self._episode_id,
|
| 150 |
-
task_id=self.task.task_id,
|
| 151 |
-
difficulty=self.task.difficulty,
|
| 152 |
-
step_count=self._step_count,
|
| 153 |
-
reward=round(self._reward_total, 4),
|
| 154 |
-
done=self._done,
|
| 155 |
-
current_score=round(self._score, 4),
|
| 156 |
-
max_steps=self._max_steps,
|
| 157 |
-
case=self._case.model_copy(deep=True),
|
| 158 |
-
current_sla_minutes_remaining=self._current_sla_minutes_remaining,
|
| 159 |
-
workflow_stage=self._workflow_stage(),
|
| 160 |
-
required_next_actions=self._required_next_actions(),
|
| 161 |
-
risk_flags=self._risk_flags(),
|
| 162 |
-
action_history=[entry.model_copy(deep=True) for entry in self._history],
|
| 163 |
-
completed_milestones=list(self._completed_milestones),
|
| 164 |
-
last_feedback=self._last_feedback,
|
| 165 |
-
)
|
| 166 |
-
|
| 167 |
-
def reset(
|
| 168 |
-
self,
|
| 169 |
-
seed: int | None = None,
|
| 170 |
-
episode_id: str | None = None,
|
| 171 |
-
**kwargs,
|
| 172 |
-
) -> SupportDeskObservation:
|
| 173 |
-
with self.__class__._state_lock:
|
| 174 |
-
if not self._explicit_task_id:
|
| 175 |
-
task_ids = list_task_ids()
|
| 176 |
-
next_task_id = task_ids[self.__class__._shared_reset_counter % len(task_ids)]
|
| 177 |
-
self.__class__._shared_reset_counter += 1
|
| 178 |
-
self.task = get_task(next_task_id)
|
| 179 |
-
self._max_steps = self.task.max_steps
|
| 180 |
-
self._episode_id = episode_id or f"{self.task.task_id}-{uuid.uuid4().hex[:8]}"
|
| 181 |
-
initial_state = self.__class__._build_initial_state(self.task, self._episode_id)
|
| 182 |
-
self.__class__._episode_store[self._episode_id] = initial_state
|
| 183 |
-
self.__class__._episode_task_ids[self._episode_id] = self.task.task_id
|
| 184 |
-
self.__class__._latest_episode_id = self._episode_id
|
| 185 |
-
self._load_episode(self._episode_id)
|
| 186 |
-
return self._build_observation(reward=0.0, done=False)
|
| 187 |
-
|
| 188 |
-
def step(
|
| 189 |
-
self,
|
| 190 |
-
action: SupportDeskAction,
|
| 191 |
-
timeout_s: float | None = None,
|
| 192 |
-
episode_id: str | None = None,
|
| 193 |
-
**kwargs,
|
| 194 |
-
) -> SupportDeskObservation:
|
| 195 |
-
with self.__class__._state_lock:
|
| 196 |
-
self._load_episode(episode_id, **kwargs)
|
| 197 |
-
|
| 198 |
-
if self._done:
|
| 199 |
-
return self._build_observation(
|
| 200 |
-
reward=-0.05,
|
| 201 |
-
done=True,
|
| 202 |
-
feedback="Episode already finished. Call reset() before taking more actions.",
|
| 203 |
-
)
|
| 204 |
-
|
| 205 |
-
previous_grade = grade_case(self.task, self._case)
|
| 206 |
-
previous_stage = self._workflow_stage()
|
| 207 |
-
self._apply_action(action)
|
| 208 |
-
self._step_count += 1
|
| 209 |
-
self._advance_external_events(action)
|
| 210 |
-
self._degrade_sla()
|
| 211 |
-
|
| 212 |
-
current_grade = grade_case(self.task, self._case)
|
| 213 |
-
reward = current_grade.total_score - previous_grade.total_score
|
| 214 |
-
reward += self._process_bonus(action, previous_stage, current_grade.total_score)
|
| 215 |
-
reward += self._action_penalty(
|
| 216 |
-
action,
|
| 217 |
-
current_grade.total_score,
|
| 218 |
-
previous_grade.total_score,
|
| 219 |
-
)
|
| 220 |
-
reward = round(reward, 4)
|
| 221 |
-
|
| 222 |
-
self._score = current_grade.total_score
|
| 223 |
-
self._completed_milestones = list(current_grade.completed_milestones)
|
| 224 |
-
|
| 225 |
-
if action.operation == "submit":
|
| 226 |
-
self._done = True
|
| 227 |
-
self._last_feedback = (
|
| 228 |
-
"Case submitted. Final deterministic grade is "
|
| 229 |
-
f"{current_grade.total_score:.2f}."
|
| 230 |
-
)
|
| 231 |
-
elif self._step_count >= self._max_steps:
|
| 232 |
-
self._done = True
|
| 233 |
-
self._last_feedback = (
|
| 234 |
-
f"Reached max steps ({self._max_steps}). Final deterministic grade is "
|
| 235 |
-
f"{current_grade.total_score:.2f}."
|
| 236 |
-
)
|
| 237 |
-
else:
|
| 238 |
-
self._last_feedback = self._build_feedback(current_grade, reward)
|
| 239 |
-
|
| 240 |
-
self._reward_total = round(self._reward_total + reward, 4)
|
| 241 |
-
self._history.append(
|
| 242 |
-
ActionHistoryEntry(
|
| 243 |
-
step=self._step_count,
|
| 244 |
-
operation=action.operation,
|
| 245 |
-
summary=self._summarize_action(action),
|
| 246 |
-
reward_delta=reward,
|
| 247 |
-
)
|
| 248 |
-
)
|
| 249 |
-
self._persist_episode()
|
| 250 |
-
|
| 251 |
-
return self._build_observation(reward=reward, done=self._done)
|
| 252 |
-
|
| 253 |
-
@classmethod
|
| 254 |
-
def state_for_episode(cls, episode_id: str) -> SupportDeskState:
|
| 255 |
-
with cls._state_lock:
|
| 256 |
-
state = cls._episode_store.get(episode_id)
|
| 257 |
-
if state is None:
|
| 258 |
-
raise ValueError(f"Unknown episode_id '{episode_id}'. Call reset() first.")
|
| 259 |
-
return state.model_copy(deep=True)
|
| 260 |
-
|
| 261 |
-
def close(self) -> None:
|
| 262 |
-
"""No-op close hook for compatibility with local scripts."""
|
| 263 |
-
|
| 264 |
-
def get_metadata(self) -> EnvironmentMetadata:
|
| 265 |
-
"""Return richer metadata for docs, validators, and HF Space UI."""
|
| 266 |
-
|
| 267 |
-
readme_path = Path(__file__).resolve().parents[2] / "README.md"
|
| 268 |
-
readme_content = readme_path.read_text(encoding="utf-8") if readme_path.exists() else None
|
| 269 |
-
return EnvironmentMetadata(
|
| 270 |
-
name="supportdesk_env",
|
| 271 |
-
description=(
|
| 272 |
-
"A policy-heavy enterprise operations desk with deterministic grading, delayed "
|
| 273 |
-
"customer follow-ups, SLA pressure, escalation tradeoffs, and sharper cross-functional triage."
|
| 274 |
-
),
|
| 275 |
-
readme_content=readme_content,
|
| 276 |
-
version="0.1.0",
|
| 277 |
-
author="HyperBrick",
|
| 278 |
-
)
|
| 279 |
-
|
| 280 |
-
def _apply_action(self, action: SupportDeskAction) -> None:
|
| 281 |
-
if action.operation == "classify":
|
| 282 |
-
if action.queue is not None:
|
| 283 |
-
self._case.queue = action.queue
|
| 284 |
-
if action.priority is not None:
|
| 285 |
-
self._case.priority = action.priority
|
| 286 |
-
if action.issue_type is not None:
|
| 287 |
-
self._case.issue_type = action.issue_type
|
| 288 |
-
return
|
| 289 |
-
|
| 290 |
-
if action.operation == "request_info":
|
| 291 |
-
if action.requested_fields:
|
| 292 |
-
merged = {item for item in self._case.requested_fields}
|
| 293 |
-
merged.update(action.requested_fields)
|
| 294 |
-
self._case.requested_fields = sorted(merged)
|
| 295 |
-
if self.task.follow_up_outcome != "none" and self._case.customer_follow_up.status == "none":
|
| 296 |
-
self._case.customer_follow_up = CustomerFollowUp(status="pending")
|
| 297 |
-
return
|
| 298 |
-
|
| 299 |
-
if action.operation == "draft_reply":
|
| 300 |
-
if action.reply is not None:
|
| 301 |
-
self._case.reply = action.reply
|
| 302 |
-
return
|
| 303 |
-
|
| 304 |
-
if action.operation == "add_internal_note":
|
| 305 |
-
if action.internal_note is not None:
|
| 306 |
-
self._case.internal_note = action.internal_note
|
| 307 |
-
return
|
| 308 |
-
|
| 309 |
-
if action.operation == "submit":
|
| 310 |
-
if action.status is not None:
|
| 311 |
-
self._case.status = action.status
|
| 312 |
-
if action.resolution_code is not None:
|
| 313 |
-
self._case.resolution_code = action.resolution_code
|
| 314 |
-
|
| 315 |
-
def _advance_external_events(self, action: SupportDeskAction) -> None:
|
| 316 |
-
if self._case.customer_follow_up.status == "pending" and action.operation == "wait":
|
| 317 |
-
self._case.customer_follow_up = CustomerFollowUp(
|
| 318 |
-
status=self.task.follow_up_outcome,
|
| 319 |
-
message=self.task.follow_up_message or None,
|
| 320 |
-
provided_fields=list(self.task.follow_up_provided_fields),
|
| 321 |
-
wrong_fields=list(self.task.follow_up_wrong_fields),
|
| 322 |
-
)
|
| 323 |
-
|
| 324 |
-
def _degrade_sla(self) -> None:
|
| 325 |
-
if self._current_sla_minutes_remaining is None:
|
| 326 |
-
return
|
| 327 |
-
self._current_sla_minutes_remaining = max(
|
| 328 |
-
0,
|
| 329 |
-
self._current_sla_minutes_remaining - self.task.sla_step_cost,
|
| 330 |
-
)
|
| 331 |
-
|
| 332 |
-
def _action_penalty(
|
| 333 |
-
self,
|
| 334 |
-
action: SupportDeskAction,
|
| 335 |
-
current_score: float,
|
| 336 |
-
previous_score: float,
|
| 337 |
-
) -> float:
|
| 338 |
-
penalty = 0.0
|
| 339 |
-
if current_score <= previous_score:
|
| 340 |
-
penalty -= 0.03
|
| 341 |
-
penalty -= self._mixed_action_penalty(action)
|
| 342 |
-
penalty -= self._escalation_tradeoff_penalty()
|
| 343 |
-
if action.operation == "draft_reply" and not action.reply:
|
| 344 |
-
penalty -= 0.03
|
| 345 |
-
if action.operation == "request_info" and not action.requested_fields:
|
| 346 |
-
penalty -= 0.03
|
| 347 |
-
if action.operation == "add_internal_note" and not action.internal_note:
|
| 348 |
-
penalty -= 0.03
|
| 349 |
-
if action.operation == "classify" and not any(
|
| 350 |
-
[action.queue, action.priority, action.issue_type, action.status, action.resolution_code]
|
| 351 |
-
):
|
| 352 |
-
penalty -= 0.03
|
| 353 |
-
if action.operation == "wait" and self._case.customer_follow_up.status != "pending":
|
| 354 |
-
penalty -= 0.02
|
| 355 |
-
if action.operation == "submit" and self._required_next_actions():
|
| 356 |
-
penalty -= 0.08
|
| 357 |
-
if (
|
| 358 |
-
self.task.under_escalation_deadline_step is not None
|
| 359 |
-
and self._step_count >= self.task.under_escalation_deadline_step
|
| 360 |
-
and (self._case.queue != self.task.gold_queue or self._case.priority != self.task.gold_priority)
|
| 361 |
-
):
|
| 362 |
-
penalty -= 0.04
|
| 363 |
-
if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining <= 15:
|
| 364 |
-
penalty -= 0.02
|
| 365 |
-
return round(penalty, 4)
|
| 366 |
-
|
| 367 |
-
def _build_feedback(self, grade, reward: float) -> str:
|
| 368 |
-
return (
|
| 369 |
-
f"Reward delta {reward:+.2f}. Current score {grade.total_score:.2f}. "
|
| 370 |
-
f"SLA remaining: {self._current_sla_minutes_remaining if self._current_sla_minutes_remaining is not None else 'n/a'} minutes. "
|
| 371 |
-
f"Stage: {self._workflow_stage()}. "
|
| 372 |
-
f"Customer follow-up: {self._case.customer_follow_up.status}. "
|
| 373 |
-
f"Next actions: {', '.join(self._required_next_actions()) or 'none'}. "
|
| 374 |
-
f"Completed milestones: {', '.join(grade.completed_milestones) or 'none yet'}."
|
| 375 |
-
)
|
| 376 |
-
|
| 377 |
-
def _summarize_action(self, action: SupportDeskAction) -> str:
|
| 378 |
-
parts = [action.operation]
|
| 379 |
-
if action.queue:
|
| 380 |
-
parts.append(f"queue={action.queue}")
|
| 381 |
-
if action.priority:
|
| 382 |
-
parts.append(f"priority={action.priority}")
|
| 383 |
-
if action.issue_type:
|
| 384 |
-
parts.append(f"issue_type={action.issue_type}")
|
| 385 |
-
if action.status:
|
| 386 |
-
parts.append(f"status={action.status}")
|
| 387 |
-
if action.resolution_code:
|
| 388 |
-
parts.append(f"resolution={action.resolution_code}")
|
| 389 |
-
if action.requested_fields:
|
| 390 |
-
parts.append(f"requested={','.join(action.requested_fields)}")
|
| 391 |
-
if action.reply:
|
| 392 |
-
parts.append("reply=yes")
|
| 393 |
-
if action.internal_note:
|
| 394 |
-
parts.append("note=yes")
|
| 395 |
-
return " | ".join(parts)
|
| 396 |
-
|
| 397 |
-
def _build_observation(
|
| 398 |
-
self,
|
| 399 |
-
reward: float,
|
| 400 |
-
done: bool,
|
| 401 |
-
feedback: str | None = None,
|
| 402 |
-
) -> SupportDeskObservation:
|
| 403 |
-
return SupportDeskObservation(
|
| 404 |
-
task_id=self.task.task_id,
|
| 405 |
-
difficulty=self.task.difficulty,
|
| 406 |
-
objective=self.task.objective,
|
| 407 |
-
ticket=self.task.ticket,
|
| 408 |
-
knowledge_base=list(self.task.knowledge_base),
|
| 409 |
-
available_queues=list(ALL_QUEUES),
|
| 410 |
-
available_priorities=list(ALL_PRIORITIES),
|
| 411 |
-
available_statuses=list(ALL_STATUSES),
|
| 412 |
-
available_issue_types=list(ALL_ISSUE_TYPES),
|
| 413 |
-
case=self._case.model_copy(deep=True),
|
| 414 |
-
current_sla_minutes_remaining=self._current_sla_minutes_remaining,
|
| 415 |
-
workflow_stage=self._workflow_stage(),
|
| 416 |
-
required_next_actions=self._required_next_actions(),
|
| 417 |
-
risk_flags=self._risk_flags(),
|
| 418 |
-
action_history=[entry.model_copy(deep=True) for entry in self._history],
|
| 419 |
-
feedback=feedback or self._last_feedback,
|
| 420 |
-
remaining_steps=max(self._max_steps - self._step_count, 0),
|
| 421 |
-
reward=reward,
|
| 422 |
-
done=done,
|
| 423 |
-
)
|
| 424 |
-
|
| 425 |
-
def _workflow_stage(self) -> str:
|
| 426 |
-
if self._done:
|
| 427 |
-
return "closed"
|
| 428 |
-
if self._case.queue is None or self._case.priority is None or self._case.issue_type is None:
|
| 429 |
-
return "intake"
|
| 430 |
-
if self.task.required_requested_fields and sorted(self._case.requested_fields) != sorted(self.task.required_requested_fields):
|
| 431 |
-
return "verification"
|
| 432 |
-
if self._case.customer_follow_up.status == "pending":
|
| 433 |
-
return "awaiting_customer"
|
| 434 |
-
if self._case.customer_follow_up.status in {"partial", "incorrect"}:
|
| 435 |
-
return "follow_up_review"
|
| 436 |
-
if not self._case.reply:
|
| 437 |
-
return "customer_communication"
|
| 438 |
-
if not self._case.internal_note:
|
| 439 |
-
return "internal_handoff"
|
| 440 |
-
if self._case.status != self.task.gold_status or self._case.resolution_code != self.task.gold_resolution_code:
|
| 441 |
-
return "final_resolution"
|
| 442 |
-
return "ready_to_submit"
|
| 443 |
-
|
| 444 |
-
def _required_next_actions(self) -> list[str]:
|
| 445 |
-
if self._case.queue is None or self._case.priority is None or self._case.issue_type is None:
|
| 446 |
-
return ["classify"]
|
| 447 |
-
if self.task.required_requested_fields and sorted(self._case.requested_fields) != sorted(self.task.required_requested_fields):
|
| 448 |
-
return ["request_info"]
|
| 449 |
-
if self._case.customer_follow_up.status == "pending":
|
| 450 |
-
return ["wait"]
|
| 451 |
-
needed: list[str] = []
|
| 452 |
-
if not self._case.reply:
|
| 453 |
-
needed.append("draft_reply")
|
| 454 |
-
if not self._case.internal_note:
|
| 455 |
-
needed.append("add_internal_note")
|
| 456 |
-
if self._case.status != self.task.gold_status or self._case.resolution_code != self.task.gold_resolution_code:
|
| 457 |
-
needed.append("submit")
|
| 458 |
-
return needed
|
| 459 |
-
|
| 460 |
-
def _risk_flags(self) -> list[str]:
|
| 461 |
-
flags = list(self.task.risk_flags)
|
| 462 |
-
if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining <= 30:
|
| 463 |
-
flags.append("sla_breach_risk")
|
| 464 |
-
if self.task.ticket.affected_users and self.task.ticket.affected_users >= 1000:
|
| 465 |
-
flags.append("high_customer_impact")
|
| 466 |
-
if self.task.ticket.secondary_concerns:
|
| 467 |
-
flags.append("secondary_issue_present")
|
| 468 |
-
if self._case.customer_follow_up.status == "partial":
|
| 469 |
-
flags.append("customer_reply_incomplete")
|
| 470 |
-
if self._case.customer_follow_up.status == "incorrect":
|
| 471 |
-
flags.append("customer_reply_irrelevant")
|
| 472 |
-
return sorted(set(flags))
|
| 473 |
-
|
| 474 |
-
def _process_bonus(
|
| 475 |
-
self,
|
| 476 |
-
action: SupportDeskAction,
|
| 477 |
-
previous_stage: str,
|
| 478 |
-
current_score: float,
|
| 479 |
-
) -> float:
|
| 480 |
-
bonus = 0.0
|
| 481 |
-
stage_rank = {
|
| 482 |
-
"intake": 0,
|
| 483 |
-
"verification": 1,
|
| 484 |
-
"awaiting_customer": 2,
|
| 485 |
-
"follow_up_review": 3,
|
| 486 |
-
"customer_communication": 4,
|
| 487 |
-
"internal_handoff": 5,
|
| 488 |
-
"final_resolution": 6,
|
| 489 |
-
"ready_to_submit": 7,
|
| 490 |
-
"closed": 8,
|
| 491 |
-
}
|
| 492 |
-
current_stage = self._workflow_stage()
|
| 493 |
-
if stage_rank.get(current_stage, 0) > stage_rank.get(previous_stage, 0):
|
| 494 |
-
bonus += 0.02
|
| 495 |
-
if action.operation == "classify" and self._step_count == 1:
|
| 496 |
-
if self._case.queue == self.task.gold_queue and self._case.priority == self.task.gold_priority:
|
| 497 |
-
bonus += 0.03
|
| 498 |
-
if action.operation == "request_info" and current_score > 0 and self.task.required_requested_fields:
|
| 499 |
-
bonus += 0.02
|
| 500 |
-
if action.operation == "wait" and self._case.customer_follow_up.status in {"partial", "complete", "incorrect"}:
|
| 501 |
-
bonus += 0.02
|
| 502 |
-
if action.operation == "submit" and not self._required_next_actions():
|
| 503 |
-
bonus += 0.03
|
| 504 |
-
if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining > 0:
|
| 505 |
-
if self.task.gold_priority == "urgent" and self._step_count <= 2 and self._case.queue == self.task.gold_queue:
|
| 506 |
-
bonus += 0.02
|
| 507 |
-
return round(bonus, 4)
|
| 508 |
-
|
| 509 |
-
def _mixed_action_penalty(self, action: SupportDeskAction) -> float:
|
| 510 |
-
allowed_fields = {
|
| 511 |
-
"classify": {"queue", "priority", "issue_type"},
|
| 512 |
-
"request_info": {"requested_fields"},
|
| 513 |
-
"draft_reply": {"reply"},
|
| 514 |
-
"add_internal_note": {"internal_note"},
|
| 515 |
-
"submit": {"status", "resolution_code"},
|
| 516 |
-
"wait": set(),
|
| 517 |
-
}
|
| 518 |
-
populated_fields = {
|
| 519 |
-
"queue": action.queue,
|
| 520 |
-
"priority": action.priority,
|
| 521 |
-
"issue_type": action.issue_type,
|
| 522 |
-
"status": action.status,
|
| 523 |
-
"resolution_code": action.resolution_code,
|
| 524 |
-
"requested_fields": action.requested_fields,
|
| 525 |
-
"reply": action.reply,
|
| 526 |
-
"internal_note": action.internal_note,
|
| 527 |
-
}
|
| 528 |
-
extras = 0
|
| 529 |
-
for field_name, value in populated_fields.items():
|
| 530 |
-
if field_name in allowed_fields[action.operation]:
|
| 531 |
-
continue
|
| 532 |
-
if value is None:
|
| 533 |
-
continue
|
| 534 |
-
if isinstance(value, list) and not value:
|
| 535 |
-
continue
|
| 536 |
-
if isinstance(value, str) and not value:
|
| 537 |
-
continue
|
| 538 |
-
extras += 1
|
| 539 |
-
return min(0.06, extras * 0.02)
|
| 540 |
-
|
| 541 |
-
def _escalation_tradeoff_penalty(self) -> float:
|
| 542 |
-
penalty = 0.0
|
| 543 |
-
if self._case.queue in self.task.over_escalation_queues and self._case.queue != self.task.gold_queue:
|
| 544 |
-
penalty += 0.06
|
| 545 |
-
return round(penalty, 4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
supportdesk_env/tasks.py
DELETED
|
@@ -1,405 +0,0 @@
|
|
| 1 |
-
"""Task registry for the SupportDesk environment."""
|
| 2 |
-
|
| 3 |
-
from __future__ import annotations
|
| 4 |
-
|
| 5 |
-
from dataclasses import dataclass
|
| 6 |
-
from typing import Literal
|
| 7 |
-
|
| 8 |
-
from supportdesk_env.models import KnowledgeSnippet, SupportTicket
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
ALL_QUEUES = [
|
| 12 |
-
"billing_ops",
|
| 13 |
-
"trust_and_safety",
|
| 14 |
-
"platform_engineering",
|
| 15 |
-
"compliance_ops",
|
| 16 |
-
"general_support",
|
| 17 |
-
]
|
| 18 |
-
ALL_PRIORITIES = ["low", "normal", "high", "urgent"]
|
| 19 |
-
ALL_STATUSES = ["new", "waiting_on_customer", "resolved", "escalated"]
|
| 20 |
-
ALL_ISSUE_TYPES = [
|
| 21 |
-
"duplicate_charge",
|
| 22 |
-
"account_compromise",
|
| 23 |
-
"production_incident",
|
| 24 |
-
"regulated_exception",
|
| 25 |
-
"general_question",
|
| 26 |
-
]
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
@dataclass(frozen=True)
|
| 30 |
-
class SupportTaskSpec:
|
| 31 |
-
"""Immutable definition of a single support triage task."""
|
| 32 |
-
|
| 33 |
-
task_id: str
|
| 34 |
-
difficulty: Literal["easy", "medium", "hard"]
|
| 35 |
-
title: str
|
| 36 |
-
objective: str
|
| 37 |
-
ticket: SupportTicket
|
| 38 |
-
knowledge_base: tuple[KnowledgeSnippet, ...]
|
| 39 |
-
gold_queue: str
|
| 40 |
-
gold_priority: str
|
| 41 |
-
gold_issue_type: str
|
| 42 |
-
gold_status: str
|
| 43 |
-
gold_resolution_code: str
|
| 44 |
-
required_requested_fields: tuple[str, ...]
|
| 45 |
-
required_reply_markers: tuple[tuple[str, ...], ...]
|
| 46 |
-
required_note_markers: tuple[tuple[str, ...], ...]
|
| 47 |
-
forbidden_reply_markers: tuple[str, ...] = ()
|
| 48 |
-
risk_flags: tuple[str, ...] = ()
|
| 49 |
-
follow_up_outcome: Literal["none", "partial", "complete", "incorrect"] = "none"
|
| 50 |
-
follow_up_message: str = ""
|
| 51 |
-
follow_up_provided_fields: tuple[str, ...] = ()
|
| 52 |
-
follow_up_wrong_fields: tuple[str, ...] = ()
|
| 53 |
-
sla_step_cost: int = 15
|
| 54 |
-
over_escalation_queues: tuple[str, ...] = ()
|
| 55 |
-
under_escalation_deadline_step: int | None = None
|
| 56 |
-
max_steps: int = 6
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
TASKS: dict[str, SupportTaskSpec] = {
|
| 60 |
-
"billing_refund_easy": SupportTaskSpec(
|
| 61 |
-
task_id="billing_refund_easy",
|
| 62 |
-
difficulty="easy",
|
| 63 |
-
title="Duplicate charge refund triage",
|
| 64 |
-
objective=(
|
| 65 |
-
"Triage a duplicate-charge billing ticket, send the correct customer response, "
|
| 66 |
-
"and close the case only if no further customer information is required."
|
| 67 |
-
),
|
| 68 |
-
ticket=SupportTicket(
|
| 69 |
-
customer_name="Riya Shah",
|
| 70 |
-
customer_tier="pro",
|
| 71 |
-
company="PixelNorth Studio",
|
| 72 |
-
subject="Charged twice after I canceled",
|
| 73 |
-
body=(
|
| 74 |
-
"I canceled our Pro annual workspace yesterday, but my card was charged again "
|
| 75 |
-
"this morning and I still see the old invoice. We only had one workspace, "
|
| 76 |
-
"so this looks like a duplicate charge. Please fix it quickly."
|
| 77 |
-
),
|
| 78 |
-
region="ap-south-1",
|
| 79 |
-
affected_users=12,
|
| 80 |
-
sla_minutes_remaining=240,
|
| 81 |
-
business_impact="Finance ops are blocked from closing the monthly books until the duplicate invoice is fixed.",
|
| 82 |
-
secondary_concerns=["The customer also wants confirmation that the canceled workspace will stay deactivated."],
|
| 83 |
-
attachments=["invoice_7741.pdf"],
|
| 84 |
-
),
|
| 85 |
-
knowledge_base=(
|
| 86 |
-
KnowledgeSnippet(
|
| 87 |
-
article_id="KB-101",
|
| 88 |
-
title="Duplicate charges and same-day cancellations",
|
| 89 |
-
content=(
|
| 90 |
-
"If a customer reports a duplicate charge and the subscription is already "
|
| 91 |
-
"canceled, route the ticket to billing_ops with high priority. Billing can "
|
| 92 |
-
"approve the refund immediately without requesting extra information when an "
|
| 93 |
-
"invoice is attached."
|
| 94 |
-
),
|
| 95 |
-
),
|
| 96 |
-
KnowledgeSnippet(
|
| 97 |
-
article_id="KB-102",
|
| 98 |
-
title="Refund communication checklist",
|
| 99 |
-
content=(
|
| 100 |
-
"Customer replies for approved duplicate-charge refunds must confirm that a "
|
| 101 |
-
"refund is being processed, mention the duplicate charge, and set the "
|
| 102 |
-
"expectation that funds typically appear within 5-7 business days."
|
| 103 |
-
),
|
| 104 |
-
),
|
| 105 |
-
KnowledgeSnippet(
|
| 106 |
-
article_id="KB-103",
|
| 107 |
-
title="When to close a billing case",
|
| 108 |
-
content=(
|
| 109 |
-
"Close the case as resolved only after the refund path is clear and no more "
|
| 110 |
-
"customer details are needed."
|
| 111 |
-
),
|
| 112 |
-
),
|
| 113 |
-
),
|
| 114 |
-
gold_queue="billing_ops",
|
| 115 |
-
gold_priority="high",
|
| 116 |
-
gold_issue_type="duplicate_charge",
|
| 117 |
-
gold_status="resolved",
|
| 118 |
-
gold_resolution_code="refund_approved",
|
| 119 |
-
required_requested_fields=(),
|
| 120 |
-
required_reply_markers=(
|
| 121 |
-
("refund", "refunded", "reimburse"),
|
| 122 |
-
("duplicate charge", "charged twice", "double charge"),
|
| 123 |
-
("5-7 business days", "5 to 7 business days", "within 7 business days"),
|
| 124 |
-
),
|
| 125 |
-
required_note_markers=(
|
| 126 |
-
("duplicate charge", "double charge"),
|
| 127 |
-
("refund", "refund approved"),
|
| 128 |
-
),
|
| 129 |
-
forbidden_reply_markers=("chargeback", "security team"),
|
| 130 |
-
risk_flags=("finance_close_risk", "avoid_unnecessary_back_and_forth"),
|
| 131 |
-
over_escalation_queues=("trust_and_safety", "platform_engineering", "compliance_ops"),
|
| 132 |
-
sla_step_cost=10,
|
| 133 |
-
max_steps=6,
|
| 134 |
-
),
|
| 135 |
-
"account_takeover_medium": SupportTaskSpec(
|
| 136 |
-
task_id="account_takeover_medium",
|
| 137 |
-
difficulty="medium",
|
| 138 |
-
title="Suspicious login recovery triage",
|
| 139 |
-
objective=(
|
| 140 |
-
"Handle a potential account-compromise case, request the missing verification "
|
| 141 |
-
"details, communicate safe next steps, and keep the case open until the customer replies. "
|
| 142 |
-
"The agent must protect account safety without promising an unsafe immediate unlock."
|
| 143 |
-
),
|
| 144 |
-
ticket=SupportTicket(
|
| 145 |
-
customer_name="Marcus Lee",
|
| 146 |
-
customer_tier="pro",
|
| 147 |
-
company="Northline Analytics",
|
| 148 |
-
subject="Locked out after strange login alert",
|
| 149 |
-
body=(
|
| 150 |
-
"Our workspace admin got a login alert from a country none of us have visited, "
|
| 151 |
-
"and now I can't get back into the account. Please unlock it ASAP. The billing "
|
| 152 |
-
"email is still ours, but I'm worried someone got in."
|
| 153 |
-
),
|
| 154 |
-
region="us-east-1",
|
| 155 |
-
affected_users=34,
|
| 156 |
-
sla_minutes_remaining=90,
|
| 157 |
-
business_impact="The admin is locked out of the analytics workspace ahead of the Monday executive review.",
|
| 158 |
-
secondary_concerns=["The customer wants the account unlocked immediately, but the verification flow cannot be skipped."],
|
| 159 |
-
attachments=[],
|
| 160 |
-
),
|
| 161 |
-
knowledge_base=(
|
| 162 |
-
KnowledgeSnippet(
|
| 163 |
-
article_id="SEC-201",
|
| 164 |
-
title="Account compromise routing",
|
| 165 |
-
content=(
|
| 166 |
-
"Potential account-takeover reports route to trust_and_safety with urgent "
|
| 167 |
-
"priority. Do not resolve the case immediately."
|
| 168 |
-
),
|
| 169 |
-
),
|
| 170 |
-
KnowledgeSnippet(
|
| 171 |
-
article_id="SEC-202",
|
| 172 |
-
title="Verification details before unlock",
|
| 173 |
-
content=(
|
| 174 |
-
"Before access can be restored, ask the customer for the workspace_id, the "
|
| 175 |
-
"last successful login time, and the billing email on file. Keep the status "
|
| 176 |
-
"waiting_on_customer until the details arrive."
|
| 177 |
-
),
|
| 178 |
-
),
|
| 179 |
-
KnowledgeSnippet(
|
| 180 |
-
article_id="SEC-203",
|
| 181 |
-
title="Customer response checklist",
|
| 182 |
-
content=(
|
| 183 |
-
"Security replies should tell the customer to reset their password, scan "
|
| 184 |
-
"their device for malware, and explain that the trust team is reviewing the case."
|
| 185 |
-
),
|
| 186 |
-
),
|
| 187 |
-
),
|
| 188 |
-
gold_queue="trust_and_safety",
|
| 189 |
-
gold_priority="urgent",
|
| 190 |
-
gold_issue_type="account_compromise",
|
| 191 |
-
gold_status="waiting_on_customer",
|
| 192 |
-
gold_resolution_code="verification_needed",
|
| 193 |
-
required_requested_fields=("workspace_id", "last_successful_login", "billing_email"),
|
| 194 |
-
required_reply_markers=(
|
| 195 |
-
("reset your password", "change your password"),
|
| 196 |
-
("scan", "malware", "device check"),
|
| 197 |
-
("trust team", "security team", "trust and safety"),
|
| 198 |
-
),
|
| 199 |
-
required_note_markers=(
|
| 200 |
-
("suspicious login", "strange login"),
|
| 201 |
-
("locked out", "can't get back", "cannot get back"),
|
| 202 |
-
),
|
| 203 |
-
risk_flags=("unsafe_unlock_request", "identity_verification_required"),
|
| 204 |
-
follow_up_outcome="partial",
|
| 205 |
-
follow_up_message=(
|
| 206 |
-
"Customer follow-up: workspace_id=ws_9021 and billing email confirmed, "
|
| 207 |
-
"but they could not provide the last successful login time yet."
|
| 208 |
-
),
|
| 209 |
-
follow_up_provided_fields=("workspace_id", "billing_email"),
|
| 210 |
-
sla_step_cost=18,
|
| 211 |
-
under_escalation_deadline_step=2,
|
| 212 |
-
max_steps=7,
|
| 213 |
-
),
|
| 214 |
-
"api_incident_hard": SupportTaskSpec(
|
| 215 |
-
task_id="api_incident_hard",
|
| 216 |
-
difficulty="hard",
|
| 217 |
-
title="Production API incident escalation",
|
| 218 |
-
objective=(
|
| 219 |
-
"Triage a high-pressure enterprise incident, ask for the right diagnostics, notify "
|
| 220 |
-
"the customer that engineering is engaged, and escalate instead of resolving. "
|
| 221 |
-
"The agent must prioritize the outage over a tempting secondary compliance question."
|
| 222 |
-
),
|
| 223 |
-
ticket=SupportTicket(
|
| 224 |
-
customer_name="Asha Verma",
|
| 225 |
-
customer_tier="enterprise",
|
| 226 |
-
company="Kairo Health",
|
| 227 |
-
subject="EU rollout blocked by intermittent 500s",
|
| 228 |
-
body=(
|
| 229 |
-
"We're launching our EU workspace tonight. Since enabling EU data residency we "
|
| 230 |
-
"see intermittent HTTP 500 responses from /v1/exports in production. Our "
|
| 231 |
-
"compliance lead is also asking whether this affects the audit trail, but the "
|
| 232 |
-
"main issue is the outage. We need help immediately."
|
| 233 |
-
),
|
| 234 |
-
region="eu-west-1",
|
| 235 |
-
affected_users=1800,
|
| 236 |
-
sla_minutes_remaining=25,
|
| 237 |
-
business_impact="A production launch and a customer-facing compliance review are both at risk tonight if the outage persists.",
|
| 238 |
-
secondary_concerns=["The compliance lead is asking whether audit trails are affected, but the live outage is the primary incident."],
|
| 239 |
-
attachments=["error_screenshot.png"],
|
| 240 |
-
),
|
| 241 |
-
knowledge_base=(
|
| 242 |
-
KnowledgeSnippet(
|
| 243 |
-
article_id="INC-301",
|
| 244 |
-
title="Production availability incidents",
|
| 245 |
-
content=(
|
| 246 |
-
"Any active production 5xx incident for a paying customer routes to "
|
| 247 |
-
"platform_engineering with urgent priority and should be escalated, not resolved."
|
| 248 |
-
),
|
| 249 |
-
),
|
| 250 |
-
KnowledgeSnippet(
|
| 251 |
-
article_id="INC-302",
|
| 252 |
-
title="Minimum diagnostics for API incidents",
|
| 253 |
-
content=(
|
| 254 |
-
"Before engineering can investigate, request concrete examples including "
|
| 255 |
-
"request_ids, UTC timestamps, and the affected region."
|
| 256 |
-
),
|
| 257 |
-
),
|
| 258 |
-
KnowledgeSnippet(
|
| 259 |
-
article_id="INC-303",
|
| 260 |
-
title="Customer communication during an incident",
|
| 261 |
-
content=(
|
| 262 |
-
"The reply should acknowledge an incident, say the on-call engineering team "
|
| 263 |
-
"is engaged, and ask for the diagnostics needed to speed investigation."
|
| 264 |
-
),
|
| 265 |
-
),
|
| 266 |
-
KnowledgeSnippet(
|
| 267 |
-
article_id="INC-304",
|
| 268 |
-
title="Primary issue triage rule",
|
| 269 |
-
content=(
|
| 270 |
-
"When a production outage appears alongside a secondary compliance or audit "
|
| 271 |
-
"question, resolve the live outage first and avoid treating the secondary "
|
| 272 |
-
"question as the primary queue-driving issue."
|
| 273 |
-
),
|
| 274 |
-
),
|
| 275 |
-
),
|
| 276 |
-
gold_queue="platform_engineering",
|
| 277 |
-
gold_priority="urgent",
|
| 278 |
-
gold_issue_type="production_incident",
|
| 279 |
-
gold_status="escalated",
|
| 280 |
-
gold_resolution_code="incident_opened",
|
| 281 |
-
required_requested_fields=("request_ids", "timestamp_utc", "region"),
|
| 282 |
-
required_reply_markers=(
|
| 283 |
-
("incident", "outage", "investigating"),
|
| 284 |
-
("on-call", "engineering team", "engineering is engaged"),
|
| 285 |
-
("request id", "request_ids"),
|
| 286 |
-
("utc", "timestamp"),
|
| 287 |
-
),
|
| 288 |
-
required_note_markers=(
|
| 289 |
-
("eu data residency", "eu rollout"),
|
| 290 |
-
("500", "http 500"),
|
| 291 |
-
("launch tonight", "tonight"),
|
| 292 |
-
),
|
| 293 |
-
risk_flags=("sev1_launch_risk", "secondary_issue_distraction", "engineering_escalation_required"),
|
| 294 |
-
follow_up_outcome="complete",
|
| 295 |
-
follow_up_message=(
|
| 296 |
-
"Customer follow-up: request_ids=req_991, req_998; UTC timestamps=2026-04-01T19:20Z, "
|
| 297 |
-
"2026-04-01T19:27Z; region=eu-west-1."
|
| 298 |
-
),
|
| 299 |
-
follow_up_provided_fields=("request_ids", "timestamp_utc", "region"),
|
| 300 |
-
sla_step_cost=25,
|
| 301 |
-
under_escalation_deadline_step=2,
|
| 302 |
-
max_steps=8,
|
| 303 |
-
),
|
| 304 |
-
"regulated_export_exception_hard": SupportTaskSpec(
|
| 305 |
-
task_id="regulated_export_exception_hard",
|
| 306 |
-
difficulty="hard",
|
| 307 |
-
title="Regulated export exception triage",
|
| 308 |
-
objective=(
|
| 309 |
-
"Handle a regulated export-access request where the customer wants a manual bypass. "
|
| 310 |
-
"The agent must avoid escalating to engineering just because an export error is mentioned, "
|
| 311 |
-
"route to compliance operations, request the right legal details, and keep the case open "
|
| 312 |
-
"until the required approvals are supplied."
|
| 313 |
-
),
|
| 314 |
-
ticket=SupportTicket(
|
| 315 |
-
customer_name="Dr. Elena Kovacs",
|
| 316 |
-
customer_tier="enterprise",
|
| 317 |
-
company="NorthSea Clinics",
|
| 318 |
-
subject="Need urgent export unlock before audit review",
|
| 319 |
-
body=(
|
| 320 |
-
"Our audit team needs tonight's export package, but after enabling EU residency the "
|
| 321 |
-
"export workflow now shows a policy block. Can you temporarily disable the restriction "
|
| 322 |
-
"or give us a manual bypass just for this audit? We are under pressure and can share "
|
| 323 |
-
"screenshots if needed."
|
| 324 |
-
),
|
| 325 |
-
region="eu-central-1",
|
| 326 |
-
affected_users=240,
|
| 327 |
-
sla_minutes_remaining=55,
|
| 328 |
-
business_impact="A regulated healthcare audit review may slip tonight if the export path remains blocked.",
|
| 329 |
-
secondary_concerns=[
|
| 330 |
-
"The customer suggests an engineering workaround, but legal approval is the real gating factor."
|
| 331 |
-
],
|
| 332 |
-
attachments=["policy_block.png"],
|
| 333 |
-
),
|
| 334 |
-
knowledge_base=(
|
| 335 |
-
KnowledgeSnippet(
|
| 336 |
-
article_id="CMP-401",
|
| 337 |
-
title="Export policy exceptions",
|
| 338 |
-
content=(
|
| 339 |
-
"Requests to bypass region or export restrictions route to compliance_ops. "
|
| 340 |
-
"Do not promise a manual engineering workaround before approvals are verified."
|
| 341 |
-
),
|
| 342 |
-
),
|
| 343 |
-
KnowledgeSnippet(
|
| 344 |
-
article_id="CMP-402",
|
| 345 |
-
title="Minimum legal details for export review",
|
| 346 |
-
content=(
|
| 347 |
-
"Ask for the tenant_region, dpa_amendment_id, and legal_contact_email before "
|
| 348 |
-
"compliance can review an export exception."
|
| 349 |
-
),
|
| 350 |
-
),
|
| 351 |
-
KnowledgeSnippet(
|
| 352 |
-
article_id="CMP-403",
|
| 353 |
-
title="Customer response rules for regulated exceptions",
|
| 354 |
-
content=(
|
| 355 |
-
"Replies must explain that no temporary bypass can be granted yet, mention the "
|
| 356 |
-
"compliance review, and request the required legal approval details."
|
| 357 |
-
),
|
| 358 |
-
),
|
| 359 |
-
),
|
| 360 |
-
gold_queue="compliance_ops",
|
| 361 |
-
gold_priority="high",
|
| 362 |
-
gold_issue_type="regulated_exception",
|
| 363 |
-
gold_status="waiting_on_customer",
|
| 364 |
-
gold_resolution_code="legal_approval_required",
|
| 365 |
-
required_requested_fields=("tenant_region", "dpa_amendment_id", "legal_contact_email"),
|
| 366 |
-
required_reply_markers=(
|
| 367 |
-
("no temporary bypass", "cannot provide a bypass", "can’t provide a bypass"),
|
| 368 |
-
("compliance review", "compliance team"),
|
| 369 |
-
("tenant_region", "tenant region"),
|
| 370 |
-
("dpa_amendment_id", "dpa amendment", "amendment id"),
|
| 371 |
-
),
|
| 372 |
-
required_note_markers=(
|
| 373 |
-
("audit", "audit review"),
|
| 374 |
-
("eu residency", "policy block"),
|
| 375 |
-
("manual bypass", "workaround"),
|
| 376 |
-
),
|
| 377 |
-
forbidden_reply_markers=("engineering workaround", "disable the restriction", "temporary unlock approved"),
|
| 378 |
-
risk_flags=("regulated_data_risk", "unsafe_shortcut_pressure", "over_escalation_risk"),
|
| 379 |
-
follow_up_outcome="incorrect",
|
| 380 |
-
follow_up_message=(
|
| 381 |
-
"Customer follow-up: sent a screenshot and export job ID, but did not include the DPA "
|
| 382 |
-
"amendment ID or legal contact."
|
| 383 |
-
),
|
| 384 |
-
follow_up_wrong_fields=("screenshot", "job_id"),
|
| 385 |
-
sla_step_cost=16,
|
| 386 |
-
over_escalation_queues=("platform_engineering",),
|
| 387 |
-
max_steps=8,
|
| 388 |
-
),
|
| 389 |
-
}
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
def get_task(task_id: str) -> SupportTaskSpec:
|
| 393 |
-
"""Return a task definition or raise a helpful error."""
|
| 394 |
-
|
| 395 |
-
try:
|
| 396 |
-
return TASKS[task_id]
|
| 397 |
-
except KeyError as exc: # pragma: no cover - defensive
|
| 398 |
-
valid = ", ".join(sorted(TASKS))
|
| 399 |
-
raise ValueError(f"Unknown task_id '{task_id}'. Valid task ids: {valid}") from exc
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
def list_task_ids() -> list[str]:
|
| 403 |
-
"""List tasks in a stable evaluation order."""
|
| 404 |
-
|
| 405 |
-
return list(TASKS)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|