Spaces:

modelbuilderhq
/

HyperBrickCaseOps

Sleeping

App Files Files Community

modelbuilderhq commited on 30 days ago

Commit

08bf384

verified ·

1 Parent(s): 4f129c9

Delete folder supportdesk_env with huggingface_hub

Browse files

Files changed (10) hide show

supportdesk_env/__init__.py +0 -46
supportdesk_env/client.py +0 -38
supportdesk_env/graders.py +0 -167
supportdesk_env/models.py +0 -122
supportdesk_env/openenv_compat.py +0 -76
supportdesk_env/policies.py +0 -84
supportdesk_env/server/__init__.py +0 -1
supportdesk_env/server/app.py +0 -245
supportdesk_env/server/supportdesk_environment.py +0 -545
supportdesk_env/tasks.py +0 -405

supportdesk_env/__init__.py DELETED Viewed

@@ -1,46 +0,0 @@
-"""SupportDesk OpenEnv environment package."""
-from supportdesk_env.graders import GradeBreakdown, grade_case, grade_task_id
-from supportdesk_env.models import (
-    ActionHistoryEntry,
-    KnowledgeSnippet,
-    SupportCaseProgress,
-    SupportDeskAction,
-    SupportDeskObservation,
-    SupportDeskState,
-    SupportTicket,
-)
-from supportdesk_env.policies import default_note, default_reply, heuristic_action
-from supportdesk_env.tasks import TASKS, SupportTaskSpec, get_task, list_task_ids
-try:
-    from supportdesk_env.client import SupportDeskEnv
-except ImportError:  # pragma: no cover - local unit tests can run without openenv-core
-    SupportDeskEnv = None  # type: ignore[assignment]
-try:
-    from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment
-except ImportError:  # pragma: no cover - guarded for partial local setups
-    SupportDeskEnvironment = None  # type: ignore[assignment]
-__all__ = [
-    "ActionHistoryEntry",
-    "GradeBreakdown",
-    "KnowledgeSnippet",
-    "SupportCaseProgress",
-    "SupportDeskAction",
-    "SupportDeskEnv",
-    "SupportDeskEnvironment",
-    "SupportDeskObservation",
-    "SupportDeskState",
-    "SupportTaskSpec",
-    "SupportTicket",
-    "TASKS",
-    "default_note",
-    "default_reply",
-    "get_task",
-    "grade_case",
-    "grade_task_id",
-    "heuristic_action",
-    "list_task_ids",
-]

supportdesk_env/client.py DELETED Viewed

@@ -1,38 +0,0 @@
-"""HTTP client for interacting with a deployed SupportDesk environment."""
-from __future__ import annotations
-from supportdesk_env.models import SupportDeskAction, SupportDeskObservation, SupportDeskState
-from supportdesk_env.openenv_compat import EnvClient, StepResult
-def _validate(model_cls, payload):
-    if hasattr(model_cls, "model_validate"):
-        return model_cls.model_validate(payload)
-    return model_cls(**payload)  # pragma: no cover - pydantic v1 fallback
-class SupportDeskEnv(EnvClient[SupportDeskAction, SupportDeskObservation, SupportDeskState]):
-    """Typed client for a locally running or deployed OpenEnv server."""
-    def _step_payload(self, action: SupportDeskAction) -> dict:
-        """Convert a typed action into the JSON payload expected by the server."""
-        if hasattr(action, "model_dump"):
-            return action.model_dump()
-        return action.dict()
-    def _parse_state(self, payload) -> SupportDeskState:
-        return _validate(SupportDeskState, payload)
-    def _parse_reset(self, payload) -> SupportDeskObservation:
-        return _validate(SupportDeskObservation, payload)
-    def _parse_result(self, payload) -> StepResult[SupportDeskObservation]:
-        observation = _validate(SupportDeskObservation, payload["observation"])
-        # OpenEnv StepResult only accepts observation/reward/done in this runtime.
-        return StepResult(
-            observation=observation,
-            reward=payload["reward"],
-            done=payload["done"],
-        )

supportdesk_env/graders.py DELETED Viewed

@@ -1,167 +0,0 @@
-"""Deterministic graders and reward helpers for SupportDesk."""
-from __future__ import annotations
-import re
-from dataclasses import dataclass
-from supportdesk_env.models import SupportCaseProgress
-from supportdesk_env.tasks import SupportTaskSpec, get_task
-STRICT_SCORE_EPSILON = 0.01
-@dataclass(frozen=True)
-class GradeBreakdown:
-    """A scored view of how close a case is to the gold solution."""
-    total_score: float
-    queue_score: float
-    priority_score: float
-    issue_type_score: float
-    requested_fields_score: float
-    reply_score: float
-    note_score: float
-    status_score: float
-    resolution_score: float
-    completed_milestones: tuple[str, ...]
-def _normalize(text: str | None) -> str:
-    if not text:
-        return ""
-    normalized = text.lower().replace("-", " ")
-    return re.sub(r"[^a-z0-9\s]", " ", normalized)
-def _marker_group_score(text: str | None, marker_groups: tuple[tuple[str, ...], ...]) -> float:
-    if not marker_groups:
-        return 1.0
-    normalized = _normalize(text)
-    if not normalized:
-        return 0.0
-    matches = 0
-    for group in marker_groups:
-        if any(_normalize(marker) in normalized for marker in group):
-            matches += 1
-    return matches / len(marker_groups)
-def _requested_fields_score(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
-    required = set(task.required_requested_fields)
-    requested = set(case.requested_fields)
-    if not required:
-        return 1.0 if not requested else 0.0
-    if not requested:
-        return 0.0
-    matched = len(required.intersection(requested))
-    extras = len(requested.difference(required))
-    raw = matched / len(required)
-    penalty = min(0.25, extras * 0.05)
-    return max(0.0, raw - penalty)
-def _reply_penalty(case: SupportCaseProgress, task: SupportTaskSpec) -> float:
-    text = _normalize(case.reply)
-    if not text:
-        return 0.0
-    return 0.0 if not any(_normalize(marker) in text for marker in task.forbidden_reply_markers) else 0.5
-def _strict_open_unit_interval(score: float) -> float:
-    """Keep final task scores strictly within (0, 1) for evaluator compatibility."""
-    return min(1.0 - STRICT_SCORE_EPSILON, max(STRICT_SCORE_EPSILON, score))
-def grade_case(task: SupportTaskSpec, case: SupportCaseProgress) -> GradeBreakdown:
-    """Score a case deterministically with total_score strictly inside (0, 1)."""
-    queue_score = 1.0 if case.queue == task.gold_queue else 0.0
-    priority_score = 1.0 if case.priority == task.gold_priority else 0.0
-    issue_type_score = 1.0 if case.issue_type == task.gold_issue_type else 0.0
-    requested_fields_score = _requested_fields_score(case, task)
-    reply_score = max(0.0, _marker_group_score(case.reply, task.required_reply_markers) - _reply_penalty(case, task))
-    note_score = _marker_group_score(case.internal_note, task.required_note_markers)
-    status_score = 1.0 if case.status == task.gold_status else 0.0
-    resolution_score = 1.0 if case.resolution_code == task.gold_resolution_code else 0.0
-    weighted_total = (
-        queue_score * 0.15
-        + priority_score * 0.10
-        + issue_type_score * 0.10
-        + requested_fields_score * 0.15
-        + reply_score * 0.25
-        + note_score * 0.10
-        + status_score * 0.10
-        + resolution_score * 0.05
-    )
-    milestones: list[str] = []
-    if queue_score:
-        milestones.append("queue")
-    if priority_score:
-        milestones.append("priority")
-    if issue_type_score:
-        milestones.append("issue_type")
-    if requested_fields_score >= 0.99:
-        milestones.append("requested_fields")
-    if reply_score >= 0.99:
-        milestones.append("reply")
-    if note_score >= 0.99:
-        milestones.append("internal_note")
-    if status_score:
-        milestones.append("status")
-    if resolution_score:
-        milestones.append("resolution_code")
-    return GradeBreakdown(
-        total_score=round(_strict_open_unit_interval(weighted_total), 4),
-        queue_score=queue_score,
-        priority_score=priority_score,
-        issue_type_score=issue_type_score,
-        requested_fields_score=round(requested_fields_score, 4),
-        reply_score=round(reply_score, 4),
-        note_score=round(note_score, 4),
-        status_score=status_score,
-        resolution_score=resolution_score,
-        completed_milestones=tuple(milestones),
-    )
-def grade_task_id(task_id: str, case: SupportCaseProgress) -> GradeBreakdown:
-    """Convenience wrapper used by tests and evaluation scripts."""
-    return grade_case(get_task(task_id), case)
-class _TaskSpecificGrader:
-    """Importable task-specific grader wrapper for validator task discovery."""
-    task_id: str = ""
-    def grade(self, case: SupportCaseProgress) -> float:
-        return grade_task_id(self.task_id, case).total_score
-    def __call__(self, case: SupportCaseProgress) -> float:
-        return self.grade(case)
-class BillingRefundEasyGrader(_TaskSpecificGrader):
-    task_id = "billing_refund_easy"
-class AccountTakeoverMediumGrader(_TaskSpecificGrader):
-    task_id = "account_takeover_medium"
-class ApiIncidentHardGrader(_TaskSpecificGrader):
-    task_id = "api_incident_hard"
-class RegulatedExportExceptionHardGrader(_TaskSpecificGrader):
-    task_id = "regulated_export_exception_hard"

supportdesk_env/models.py DELETED Viewed

@@ -1,122 +0,0 @@
-"""Typed models for the SupportDesk OpenEnv environment."""
-from __future__ import annotations
-from typing import Literal
-from pydantic import BaseModel, Field
-from supportdesk_env.openenv_compat import Action, Observation, State
-class KnowledgeSnippet(BaseModel):
-    """A policy or runbook excerpt the agent can use during triage."""
-    article_id: str
-    title: str
-    content: str
-class SupportTicket(BaseModel):
-    """Static task input representing the inbound support ticket."""
-    customer_name: str
-    customer_tier: Literal["free", "pro", "enterprise"]
-    company: str
-    subject: str
-    body: str
-    region: str
-    affected_users: int | None = None
-    sla_minutes_remaining: int | None = None
-    business_impact: str | None = None
-    secondary_concerns: list[str] = Field(default_factory=list)
-    attachments: list[str] = Field(default_factory=list)
-class ActionHistoryEntry(BaseModel):
-    """A concise trace entry used in observations and state dumps."""
-    step: int
-    operation: str
-    summary: str
-    reward_delta: float = 0.0
-class CustomerFollowUp(BaseModel):
-    """A scripted customer response that arrives after a request for more information."""
-    status: Literal["none", "pending", "partial", "complete", "incorrect"] = "none"
-    message: str | None = None
-    provided_fields: list[str] = Field(default_factory=list)
-    wrong_fields: list[str] = Field(default_factory=list)
-class SupportCaseProgress(BaseModel):
-    """Mutable case state that graders score against."""
-    queue: str | None = None
-    priority: str | None = None
-    issue_type: str | None = None
-    status: str = "new"
-    resolution_code: str | None = None
-    requested_fields: list[str] = Field(default_factory=list)
-    reply: str | None = None
-    internal_note: str | None = None
-    customer_follow_up: CustomerFollowUp = Field(default_factory=CustomerFollowUp)
-class SupportDeskAction(Action):
-    """One structured action the agent can take at each step."""
-    operation: Literal["classify", "request_info", "draft_reply", "add_internal_note", "submit", "wait"]
-    queue: str | None = None
-    priority: str | None = None
-    issue_type: str | None = None
-    status: str | None = None
-    resolution_code: str | None = None
-    requested_fields: list[str] = Field(default_factory=list)
-    reply: str | None = None
-    internal_note: str | None = None
-class SupportDeskObservation(Observation):
-    """Observation emitted to the agent after reset and each step."""
-    task_id: str
-    difficulty: Literal["easy", "medium", "hard"]
-    objective: str
-    ticket: SupportTicket
-    knowledge_base: list[KnowledgeSnippet]
-    available_queues: list[str]
-    available_priorities: list[str]
-    available_statuses: list[str]
-    available_issue_types: list[str]
-    case: SupportCaseProgress
-    current_sla_minutes_remaining: int | None = None
-    workflow_stage: str
-    required_next_actions: list[str] = Field(default_factory=list)
-    risk_flags: list[str] = Field(default_factory=list)
-    action_history: list[ActionHistoryEntry] = Field(default_factory=list)
-    feedback: str = ""
-    remaining_steps: int = 0
-class SupportDeskState(State):
-    """Current environment state returned by the OpenEnv state() API."""
-    episode_id: str | None = None
-    task_id: str
-    difficulty: Literal["easy", "medium", "hard"]
-    step_count: int = 0
-    reward: float = 0.0
-    done: bool = False
-    current_score: float = 0.0
-    max_steps: int = 0
-    case: SupportCaseProgress
-    current_sla_minutes_remaining: int | None = None
-    workflow_stage: str
-    required_next_actions: list[str] = Field(default_factory=list)
-    risk_flags: list[str] = Field(default_factory=list)
-    action_history: list[ActionHistoryEntry] = Field(default_factory=list)
-    completed_milestones: list[str] = Field(default_factory=list)
-    last_feedback: str = ""

supportdesk_env/openenv_compat.py DELETED Viewed

@@ -1,76 +0,0 @@
-"""Compatibility helpers for environments where openenv-core is not installed."""
-from __future__ import annotations
-from dataclasses import dataclass, field
-from typing import Any, Generic, TypeVar
-from pydantic import BaseModel
-A = TypeVar("A")
-O = TypeVar("O")
-S = TypeVar("S")
-OPENENV_AVAILABLE = True
-try:
-    from openenv.core.client_types import StepResult  # type: ignore
-    from openenv.core.env_client import EnvClient  # type: ignore
-    from openenv.core.env_server.interfaces import Environment  # type: ignore
-    from openenv.core.env_server.types import Action, Observation, State  # type: ignore
-    from openenv.core.env_server.types import EnvironmentMetadata  # type: ignore
-except ImportError:
-    try:
-        from openenv_core.client_types import StepResult  # type: ignore
-        from openenv_core.http_env_client import HTTPEnvClient as EnvClient  # type: ignore
-        from openenv_core.env_server.interfaces import Environment  # type: ignore
-        from openenv_core.env_server.types import Action, Observation, State  # type: ignore
-        from openenv_core.env_server.types import EnvironmentMetadata  # type: ignore
-    except ImportError:
-        OPENENV_AVAILABLE = False
-        class Action(BaseModel):
-            """Fallback Action base type for local import-only workflows."""
-        class Observation(BaseModel):
-            """Fallback Observation base type for local import-only workflows."""
-            reward: float = 0.0
-            done: bool = False
-        class State(BaseModel):
-            """Fallback State base type for local import-only workflows."""
-        class Environment(Generic[A, O, S]):
-            """Minimal base class used for local unit tests and import-based demos."""
-            def __init__(self) -> None:
-                super().__init__()
-        class EnvironmentMetadata(BaseModel):
-            """Fallback metadata model used when OpenEnv is absent."""
-            name: str
-            description: str
-            readme_content: str | None = None
-            version: str | None = None
-            author: str | None = None
-        @dataclass
-        class StepResult(Generic[O]):
-            """Fallback step result for local-only client compatibility."""
-            observation: O
-            reward: float
-            done: bool
-            info: dict[str, Any] = field(default_factory=dict)
-        class EnvClient(Generic[A, O, S]):
-            """Placeholder client that fails only when actually used."""
-            def __init__(self, *args, **kwargs) -> None:
-                raise ImportError(
-                    "SupportDeskEnv requires openenv-core to be installed. "
-                    "Run `py -3 -m pip install openenv-core` to use the HTTP client."
-                )

supportdesk_env/policies.py DELETED Viewed

@@ -1,84 +0,0 @@
-"""Reusable policy helpers for local baselines and training examples."""
-from __future__ import annotations
-from supportdesk_env.models import SupportDeskAction, SupportDeskObservation
-from supportdesk_env.tasks import get_task
-def default_reply(task_id: str) -> str:
-    """Return a task-specific high-signal customer reply."""
-    if task_id == "billing_refund_easy":
-        return (
-            "Thanks for flagging the duplicate charge. I have started the refund for the extra "
-            "charge, and the funds usually appear within 5-7 business days."
-        )
-    if task_id == "account_takeover_medium":
-        return (
-            "We have escalated this to our trust team. Please reset your password, scan your "
-            "device for malware, and reply with your workspace_id, last successful login time, "
-            "and billing email so we can verify the account safely."
-        )
-    if task_id == "regulated_export_exception_hard":
-        return (
-            "We cannot provide a bypass or temporary unlock yet. Our compliance team is running "
-            "a compliance review, and we need your tenant_region, dpa_amendment_id, and "
-            "legal_contact_email to continue that review."
-        )
-    return (
-        "We are treating this as an active incident and our on-call engineering team is engaged. "
-        "Please send the affected request IDs, UTC timestamps, and the impacted region so we can "
-        "speed up the investigation."
-    )
-def default_note(task_id: str) -> str:
-    """Return a task-specific internal note."""
-    if task_id == "billing_refund_easy":
-        return "Duplicate charge confirmed from attached invoice; refund approved."
-    if task_id == "account_takeover_medium":
-        return "Suspicious login alert reported and customer is locked out."
-    if task_id == "regulated_export_exception_hard":
-        return (
-            "Audit-driven export exception request tied to an EU residency policy block; "
-            "customer asked for a manual bypass before legal approval."
-        )
-    return "EU data residency rollout hit intermittent HTTP 500s and the customer launches tonight."
-def heuristic_action(observation: SupportDeskObservation) -> SupportDeskAction:
-    """Deterministic high-performing policy used by the baseline."""
-    task = get_task(observation.task_id)
-    case = observation.case
-    if case.queue is None or case.priority is None or case.issue_type is None:
-        return SupportDeskAction(
-            operation="classify",
-            queue=task.gold_queue,
-            priority=task.gold_priority,
-            issue_type=task.gold_issue_type,
-        )
-    if task.required_requested_fields and sorted(case.requested_fields) != sorted(task.required_requested_fields):
-        return SupportDeskAction(
-            operation="request_info",
-            requested_fields=list(task.required_requested_fields),
-        )
-    if case.customer_follow_up.status == "pending":
-        return SupportDeskAction(operation="wait")
-    if not case.reply:
-        return SupportDeskAction(operation="draft_reply", reply=default_reply(observation.task_id))
-    if not case.internal_note:
-        return SupportDeskAction(operation="add_internal_note", internal_note=default_note(observation.task_id))
-    return SupportDeskAction(
-        operation="submit",
-        status=task.gold_status,
-        resolution_code=task.gold_resolution_code,
-    )

supportdesk_env/server/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """SupportDesk server package."""

supportdesk_env/server/app.py DELETED Viewed

@@ -1,245 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# All rights reserved.
-#
-# This source code is licensed under the BSD-style license found in the
-# LICENSE file in the root directory of this source tree.
-"""
-FastAPI application for the SupportDesk environment.
-This module creates an HTTP server that exposes the SupportDeskEnvironment
-over HTTP and WebSocket endpoints, compatible with EnvClient.
-Endpoints:
-    - POST /reset: Reset the environment
-    - POST /step: Execute an action
-    - GET /state: Get current environment state
-    - GET /schema: Get action/observation schemas
-    - WS /ws: WebSocket endpoint for persistent sessions
-    - GET /tasks: Get task catalog metadata
-Usage:
-    # Development (with auto-reload):
-    uvicorn supportdesk_env.server.app:app --reload --host 0.0.0.0 --port 8000
-    # Production:
-    uvicorn supportdesk_env.server.app:app --host 0.0.0.0 --port 8000 --workers 4
-    # Or run directly:
-    python -m supportdesk_env.server.app
-"""
-from __future__ import annotations
-import os
-from typing import Any
-import uvicorn
-from fastapi import Body, HTTPException
-from fastapi.routing import APIRoute
-try:
-    from openenv.core.env_server import http_server as openenv_http_server
-except ImportError:
-    try:
-        from openenv_core.env_server import http_server as openenv_http_server
-    except Exception as e:  # pragma: no cover
-        raise ImportError(
-            "openenv is required for the web interface. Install dependencies with '\n    uv sync\n'"
-        ) from e
-try:
-    from ..models import SupportDeskAction, SupportDeskObservation, SupportDeskState
-    from ..tasks import TASKS
-    from .supportdesk_environment import SupportDeskEnvironment
-except ModuleNotFoundError:
-    from supportdesk_env.models import SupportDeskAction, SupportDeskObservation, SupportDeskState
-    from supportdesk_env.tasks import TASKS
-    from supportdesk_env.server.supportdesk_environment import SupportDeskEnvironment
-# Bind the default OpenEnv /state route to the full typed state model.
-openenv_http_server.State = SupportDeskState
-create_app = openenv_http_server.create_app
-# Create the app with web interface and README integration.
-app = create_app(
-    SupportDeskEnvironment,
-    SupportDeskAction,
-    SupportDeskObservation,
-    env_name="supportdesk_env",
-    max_concurrent_envs=1,  # increase this number to allow more concurrent WebSocket sessions
-)
-TASK_GRADER_PATHS = {
-    "billing_refund_easy": "graders:BillingRefundEasyGrader",
-    "account_takeover_medium": "graders:AccountTakeoverMediumGrader",
-    "api_incident_hard": "graders:ApiIncidentHardGrader",
-    "regulated_export_exception_hard": "graders:RegulatedExportExceptionHardGrader",
-}
-def _replace_route(path: str, methods: set[str]) -> None:
-    """Remove a generated route so we can register a score-aware replacement."""
-    app.router.routes = [
-        route
-        for route in app.router.routes
-        if not (
-            isinstance(route, APIRoute)
-            and route.path == path
-            and methods.issubset(set(route.methods or set()))
-        )
-    ]
-def _score_response(env: SupportDeskEnvironment, observation: SupportDeskObservation) -> dict[str, Any]:
-    """Return the standard OpenEnv shape plus an explicit top-level score."""
-    return {
-        "observation": observation.model_dump(),
-        "reward": observation.reward,
-        "done": observation.done,
-        "score": env.state.current_score,
-    }
-_replace_route("/reset", {"POST"})
-_replace_route("/step", {"POST"})
-@app.post("/reset")
-async def reset_with_score(
-    request: openenv_http_server.ResetRequest = Body(default_factory=openenv_http_server.ResetRequest),
-) -> dict[str, Any]:
-    """Reset the environment and expose the initial deterministic score at top level."""
-    env = SupportDeskEnvironment()
-    try:
-        kwargs = request.model_dump(exclude_unset=True)
-        observation = env.reset(**kwargs)
-        return _score_response(env, observation)
-    finally:
-        env.close()
-@app.post("/step")
-async def step_with_score(request: openenv_http_server.StepRequest) -> dict[str, Any]:
-    """Execute a step and expose the current deterministic score at top level."""
-    action_data = request.action
-    try:
-        action = openenv_http_server.deserialize_action(action_data, SupportDeskAction)
-    except openenv_http_server.ValidationError as exc:
-        raise HTTPException(status_code=422, detail=exc.errors()) from exc
-    env = SupportDeskEnvironment()
-    try:
-        kwargs = request.model_dump(exclude_unset=True, exclude={"action"})
-        observation = env.step(action, **kwargs)
-        return _score_response(env, observation)
-    finally:
-        env.close()
-@app.get("/tasks")
-def list_tasks() -> dict[str, Any]:
-    """Expose a stable task catalog for UI, debugging, and pre-submit checks."""
-    return {
-        "environment": {
-            "name": "supportdesk_env",
-            "version": "0.1.0",
-            "grader_type": "deterministic",
-            "score_range": [0.0, 1.0],
-        },
-        "total_tasks": len(TASKS),
-        "tasks": [
-            {
-                "task_id": task.task_id,
-                "grader": TASK_GRADER_PATHS[task.task_id],
-                "title": task.title,
-                "difficulty": task.difficulty,
-                "objective": task.objective,
-                "max_steps": task.max_steps,
-                "gold_issue_type": task.gold_issue_type,
-                "gold_queue": task.gold_queue,
-                "gold_priority": task.gold_priority,
-                "ticket_context": {
-                    "customer_tier": task.ticket.customer_tier,
-                    "region": task.ticket.region,
-                    "affected_users": task.ticket.affected_users,
-                    "sla_minutes_remaining": task.ticket.sla_minutes_remaining,
-                },
-            }
-            for task in TASKS.values()
-        ],
-    }
-@app.get("/episodes/{episode_id}/state", response_model=SupportDeskState)
-def get_episode_state(episode_id: str) -> SupportDeskState:
-    """Optional explicit state helper for robust episode-addressable inspection."""
-    try:
-        return SupportDeskEnvironment.state_for_episode(episode_id)
-    except ValueError as exc:
-        raise HTTPException(status_code=404, detail=str(exc)) from exc
-@app.post("/episodes/{episode_id}/step")
-def step_episode(
-    episode_id: str,
-    payload: dict[str, Any] = Body(...),
-) -> dict[str, Any]:
-    """Optional explicit step helper that does not require sticky request context."""
-    action_payload = payload.get("action")
-    if not isinstance(action_payload, dict):
-        raise HTTPException(status_code=422, detail="Request body must include an 'action' object.")
-    timeout_s = payload.get("timeout_s")
-    try:
-        action = SupportDeskAction.model_validate(action_payload)
-        env = SupportDeskEnvironment()
-        observation = env.step(action, timeout_s=timeout_s, episode_id=episode_id)
-    except ValueError as exc:
-        raise HTTPException(status_code=404, detail=str(exc)) from exc
-    return {
-        "observation": observation.model_dump(),
-        "reward": observation.reward,
-        "done": observation.done,
-        "score": SupportDeskEnvironment.state_for_episode(episode_id).current_score,
-    }
-def main(host: str = "0.0.0.0", port: int = 8000) -> None:
-    """
-    Entry point for direct execution via uv run or python -m.
-    This function enables running the server without Docker:
-        uv run --project . server
-        uv run --project . server --port 8001
-        python -m supportdesk_env.server.app
-    Args:
-        host: Host address to bind to (default: "0.0.0.0")
-        port: Port number to listen on (default: 8000)
-    For production deployments, consider using uvicorn directly with
-    multiple workers:
-        uvicorn supportdesk_env.server.app:app --workers 4
-    """
-    uvicorn.run("supportdesk_env.server.app:app", host=host, port=port)
-if __name__ == "__main__":
-    import argparse
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--host", default=os.getenv("HOST", "0.0.0.0"))
-    parser.add_argument("--port", type=int, default=int(os.getenv("PORT", "8000")))
-    args = parser.parse_args()
-    main(host=args.host, port=args.port)

supportdesk_env/server/supportdesk_environment.py DELETED Viewed

@@ -1,545 +0,0 @@
-"""SupportDesk environment implementation."""
-from __future__ import annotations
-import os
-import threading
-import uuid
-from pathlib import Path
-from typing import ClassVar
-from supportdesk_env.graders import grade_case
-from supportdesk_env.models import (
-    ActionHistoryEntry,
-    CustomerFollowUp,
-    SupportCaseProgress,
-    SupportDeskAction,
-    SupportDeskObservation,
-    SupportDeskState,
-)
-from supportdesk_env.openenv_compat import Environment, EnvironmentMetadata
-from supportdesk_env.tasks import (
-    ALL_ISSUE_TYPES,
-    ALL_PRIORITIES,
-    ALL_QUEUES,
-    ALL_STATUSES,
-    SupportTaskSpec,
-    get_task,
-    list_task_ids,
-)
-class SupportDeskEnvironment(
-    Environment[SupportDeskAction, SupportDeskObservation, SupportDeskState]
-):
-    """A realistic customer support triage environment with dense rewards."""
-    _state_lock: ClassVar[threading.RLock] = threading.RLock()
-    _episode_store: ClassVar[dict[str, SupportDeskState]] = {}
-    _episode_task_ids: ClassVar[dict[str, str]] = {}
-    _latest_episode_id: ClassVar[str | None] = None
-    _shared_reset_counter: ClassVar[int] = 0
-    def __init__(self, task_id: str | None = None):
-        super().__init__()
-        env_task_id = os.getenv("SUPPORTDESK_TASK_ID")
-        self._explicit_task_id = task_id is not None or env_task_id is not None
-        requested_task = task_id or env_task_id or list_task_ids()[0]
-        self.task: SupportTaskSpec = get_task(requested_task)
-        self._max_steps = self.task.max_steps
-        self._step_count = 0
-        self._reward_total = 0.0
-        self._done = False
-        self._last_feedback = ""
-        self._history: list[ActionHistoryEntry] = []
-        self._case = SupportCaseProgress()
-        self._episode_id: str | None = None
-        self._current_sla_minutes_remaining = self.task.ticket.sla_minutes_remaining
-        initial_grade = grade_case(self.task, self._case)
-        self._score = initial_grade.total_score
-        self._completed_milestones = list(initial_grade.completed_milestones)
-    @classmethod
-    def _build_initial_state(cls, task: SupportTaskSpec, episode_id: str) -> SupportDeskState:
-        initial_case = SupportCaseProgress()
-        initial_grade = grade_case(task, initial_case)
-        return SupportDeskState(
-            episode_id=episode_id,
-            task_id=task.task_id,
-            difficulty=task.difficulty,
-            step_count=0,
-            reward=0.0,
-            done=False,
-            current_score=initial_grade.total_score,
-            max_steps=task.max_steps,
-            case=initial_case,
-            current_sla_minutes_remaining=task.ticket.sla_minutes_remaining,
-            workflow_stage="intake",
-            required_next_actions=["classify"],
-            risk_flags=[],
-            action_history=[],
-            completed_milestones=list(initial_grade.completed_milestones),
-            last_feedback="New case loaded. Review the ticket and policy snippets before acting.",
-        )
-    @classmethod
-    def _extract_episode_id(cls, episode_id: str | None = None, **kwargs) -> str | None:
-        if episode_id:
-            return episode_id
-        for key in ("episode_id", "request_id"):
-            value = kwargs.get(key)
-            if isinstance(value, str) and value:
-                return value
-        return None
-    def _load_episode(self, episode_id: str | None = None, **kwargs) -> None:
-        resolved_episode_id = self._extract_episode_id(episode_id, **kwargs) or self.__class__._latest_episode_id
-        if not resolved_episode_id:
-            return
-        episode_state = self.__class__._episode_store.get(resolved_episode_id)
-        if episode_state is None:
-            raise ValueError(
-                f"Unknown episode_id '{resolved_episode_id}'. Call reset() first or provide a valid episode_id."
-            )
-        task = get_task(self.__class__._episode_task_ids.get(resolved_episode_id, episode_state.task_id))
-        self.task = task
-        self._max_steps = episode_state.max_steps
-        self._step_count = episode_state.step_count
-        self._reward_total = episode_state.reward
-        self._done = episode_state.done
-        self._last_feedback = episode_state.last_feedback
-        self._history = [entry.model_copy(deep=True) for entry in episode_state.action_history]
-        self._case = episode_state.case.model_copy(deep=True)
-        self._episode_id = resolved_episode_id
-        self._score = episode_state.current_score
-        self._completed_milestones = list(episode_state.completed_milestones)
-        self._current_sla_minutes_remaining = episode_state.current_sla_minutes_remaining
-    def _persist_episode(self) -> None:
-        if self._episode_id is None:
-            return
-        self.__class__._episode_store[self._episode_id] = SupportDeskState(
-            episode_id=self._episode_id,
-            task_id=self.task.task_id,
-            difficulty=self.task.difficulty,
-            step_count=self._step_count,
-            reward=round(self._reward_total, 4),
-            done=self._done,
-            current_score=round(self._score, 4),
-            max_steps=self._max_steps,
-            case=self._case.model_copy(deep=True),
-            current_sla_minutes_remaining=self._current_sla_minutes_remaining,
-            workflow_stage=self._workflow_stage(),
-            required_next_actions=self._required_next_actions(),
-            risk_flags=self._risk_flags(),
-            action_history=[entry.model_copy(deep=True) for entry in self._history],
-            completed_milestones=list(self._completed_milestones),
-            last_feedback=self._last_feedback,
-        )
-        self.__class__._episode_task_ids[self._episode_id] = self.task.task_id
-        self.__class__._latest_episode_id = self._episode_id
-    @property
-    def state(self) -> SupportDeskState:
-        with self.__class__._state_lock:
-            self._load_episode()
-            return SupportDeskState(
-                episode_id=self._episode_id,
-                task_id=self.task.task_id,
-                difficulty=self.task.difficulty,
-                step_count=self._step_count,
-                reward=round(self._reward_total, 4),
-                done=self._done,
-                current_score=round(self._score, 4),
-                max_steps=self._max_steps,
-                case=self._case.model_copy(deep=True),
-                current_sla_minutes_remaining=self._current_sla_minutes_remaining,
-                workflow_stage=self._workflow_stage(),
-                required_next_actions=self._required_next_actions(),
-                risk_flags=self._risk_flags(),
-                action_history=[entry.model_copy(deep=True) for entry in self._history],
-                completed_milestones=list(self._completed_milestones),
-                last_feedback=self._last_feedback,
-            )
-    def reset(
-        self,
-        seed: int | None = None,
-        episode_id: str | None = None,
-        **kwargs,
-    ) -> SupportDeskObservation:
-        with self.__class__._state_lock:
-            if not self._explicit_task_id:
-                task_ids = list_task_ids()
-                next_task_id = task_ids[self.__class__._shared_reset_counter % len(task_ids)]
-                self.__class__._shared_reset_counter += 1
-                self.task = get_task(next_task_id)
-                self._max_steps = self.task.max_steps
-            self._episode_id = episode_id or f"{self.task.task_id}-{uuid.uuid4().hex[:8]}"
-            initial_state = self.__class__._build_initial_state(self.task, self._episode_id)
-            self.__class__._episode_store[self._episode_id] = initial_state
-            self.__class__._episode_task_ids[self._episode_id] = self.task.task_id
-            self.__class__._latest_episode_id = self._episode_id
-            self._load_episode(self._episode_id)
-            return self._build_observation(reward=0.0, done=False)
-    def step(
-        self,
-        action: SupportDeskAction,
-        timeout_s: float | None = None,
-        episode_id: str | None = None,
-        **kwargs,
-    ) -> SupportDeskObservation:
-        with self.__class__._state_lock:
-            self._load_episode(episode_id, **kwargs)
-            if self._done:
-                return self._build_observation(
-                    reward=-0.05,
-                    done=True,
-                    feedback="Episode already finished. Call reset() before taking more actions.",
-                )
-            previous_grade = grade_case(self.task, self._case)
-            previous_stage = self._workflow_stage()
-            self._apply_action(action)
-            self._step_count += 1
-            self._advance_external_events(action)
-            self._degrade_sla()
-            current_grade = grade_case(self.task, self._case)
-            reward = current_grade.total_score - previous_grade.total_score
-            reward += self._process_bonus(action, previous_stage, current_grade.total_score)
-            reward += self._action_penalty(
-                action,
-                current_grade.total_score,
-                previous_grade.total_score,
-            )
-            reward = round(reward, 4)
-            self._score = current_grade.total_score
-            self._completed_milestones = list(current_grade.completed_milestones)
-            if action.operation == "submit":
-                self._done = True
-                self._last_feedback = (
-                    "Case submitted. Final deterministic grade is "
-                    f"{current_grade.total_score:.2f}."
-                )
-            elif self._step_count >= self._max_steps:
-                self._done = True
-                self._last_feedback = (
-                    f"Reached max steps ({self._max_steps}). Final deterministic grade is "
-                    f"{current_grade.total_score:.2f}."
-                )
-            else:
-                self._last_feedback = self._build_feedback(current_grade, reward)
-            self._reward_total = round(self._reward_total + reward, 4)
-            self._history.append(
-                ActionHistoryEntry(
-                    step=self._step_count,
-                    operation=action.operation,
-                    summary=self._summarize_action(action),
-                    reward_delta=reward,
-                )
-            )
-            self._persist_episode()
-            return self._build_observation(reward=reward, done=self._done)
-    @classmethod
-    def state_for_episode(cls, episode_id: str) -> SupportDeskState:
-        with cls._state_lock:
-            state = cls._episode_store.get(episode_id)
-            if state is None:
-                raise ValueError(f"Unknown episode_id '{episode_id}'. Call reset() first.")
-            return state.model_copy(deep=True)
-    def close(self) -> None:
-        """No-op close hook for compatibility with local scripts."""
-    def get_metadata(self) -> EnvironmentMetadata:
-        """Return richer metadata for docs, validators, and HF Space UI."""
-        readme_path = Path(__file__).resolve().parents[2] / "README.md"
-        readme_content = readme_path.read_text(encoding="utf-8") if readme_path.exists() else None
-        return EnvironmentMetadata(
-            name="supportdesk_env",
-            description=(
-                "A policy-heavy enterprise operations desk with deterministic grading, delayed "
-                "customer follow-ups, SLA pressure, escalation tradeoffs, and sharper cross-functional triage."
-            ),
-            readme_content=readme_content,
-            version="0.1.0",
-            author="HyperBrick",
-        )
-    def _apply_action(self, action: SupportDeskAction) -> None:
-        if action.operation == "classify":
-            if action.queue is not None:
-                self._case.queue = action.queue
-            if action.priority is not None:
-                self._case.priority = action.priority
-            if action.issue_type is not None:
-                self._case.issue_type = action.issue_type
-            return
-        if action.operation == "request_info":
-            if action.requested_fields:
-                merged = {item for item in self._case.requested_fields}
-                merged.update(action.requested_fields)
-                self._case.requested_fields = sorted(merged)
-                if self.task.follow_up_outcome != "none" and self._case.customer_follow_up.status == "none":
-                    self._case.customer_follow_up = CustomerFollowUp(status="pending")
-            return
-        if action.operation == "draft_reply":
-            if action.reply is not None:
-                self._case.reply = action.reply
-            return
-        if action.operation == "add_internal_note":
-            if action.internal_note is not None:
-                self._case.internal_note = action.internal_note
-            return
-        if action.operation == "submit":
-            if action.status is not None:
-                self._case.status = action.status
-            if action.resolution_code is not None:
-                self._case.resolution_code = action.resolution_code
-    def _advance_external_events(self, action: SupportDeskAction) -> None:
-        if self._case.customer_follow_up.status == "pending" and action.operation == "wait":
-            self._case.customer_follow_up = CustomerFollowUp(
-                status=self.task.follow_up_outcome,
-                message=self.task.follow_up_message or None,
-                provided_fields=list(self.task.follow_up_provided_fields),
-                wrong_fields=list(self.task.follow_up_wrong_fields),
-            )
-    def _degrade_sla(self) -> None:
-        if self._current_sla_minutes_remaining is None:
-            return
-        self._current_sla_minutes_remaining = max(
-            0,
-            self._current_sla_minutes_remaining - self.task.sla_step_cost,
-        )
-    def _action_penalty(
-        self,
-        action: SupportDeskAction,
-        current_score: float,
-        previous_score: float,
-    ) -> float:
-        penalty = 0.0
-        if current_score <= previous_score:
-            penalty -= 0.03
-        penalty -= self._mixed_action_penalty(action)
-        penalty -= self._escalation_tradeoff_penalty()
-        if action.operation == "draft_reply" and not action.reply:
-            penalty -= 0.03
-        if action.operation == "request_info" and not action.requested_fields:
-            penalty -= 0.03
-        if action.operation == "add_internal_note" and not action.internal_note:
-            penalty -= 0.03
-        if action.operation == "classify" and not any(
-            [action.queue, action.priority, action.issue_type, action.status, action.resolution_code]
-        ):
-            penalty -= 0.03
-        if action.operation == "wait" and self._case.customer_follow_up.status != "pending":
-            penalty -= 0.02
-        if action.operation == "submit" and self._required_next_actions():
-            penalty -= 0.08
-        if (
-            self.task.under_escalation_deadline_step is not None
-            and self._step_count >= self.task.under_escalation_deadline_step
-            and (self._case.queue != self.task.gold_queue or self._case.priority != self.task.gold_priority)
-        ):
-            penalty -= 0.04
-        if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining <= 15:
-            penalty -= 0.02
-        return round(penalty, 4)
-    def _build_feedback(self, grade, reward: float) -> str:
-        return (
-            f"Reward delta {reward:+.2f}. Current score {grade.total_score:.2f}. "
-            f"SLA remaining: {self._current_sla_minutes_remaining if self._current_sla_minutes_remaining is not None else 'n/a'} minutes. "
-            f"Stage: {self._workflow_stage()}. "
-            f"Customer follow-up: {self._case.customer_follow_up.status}. "
-            f"Next actions: {', '.join(self._required_next_actions()) or 'none'}. "
-            f"Completed milestones: {', '.join(grade.completed_milestones) or 'none yet'}."
-        )
-    def _summarize_action(self, action: SupportDeskAction) -> str:
-        parts = [action.operation]
-        if action.queue:
-            parts.append(f"queue={action.queue}")
-        if action.priority:
-            parts.append(f"priority={action.priority}")
-        if action.issue_type:
-            parts.append(f"issue_type={action.issue_type}")
-        if action.status:
-            parts.append(f"status={action.status}")
-        if action.resolution_code:
-            parts.append(f"resolution={action.resolution_code}")
-        if action.requested_fields:
-            parts.append(f"requested={','.join(action.requested_fields)}")
-        if action.reply:
-            parts.append("reply=yes")
-        if action.internal_note:
-            parts.append("note=yes")
-        return " | ".join(parts)
-    def _build_observation(
-        self,
-        reward: float,
-        done: bool,
-        feedback: str | None = None,
-    ) -> SupportDeskObservation:
-        return SupportDeskObservation(
-            task_id=self.task.task_id,
-            difficulty=self.task.difficulty,
-            objective=self.task.objective,
-            ticket=self.task.ticket,
-            knowledge_base=list(self.task.knowledge_base),
-            available_queues=list(ALL_QUEUES),
-            available_priorities=list(ALL_PRIORITIES),
-            available_statuses=list(ALL_STATUSES),
-            available_issue_types=list(ALL_ISSUE_TYPES),
-            case=self._case.model_copy(deep=True),
-            current_sla_minutes_remaining=self._current_sla_minutes_remaining,
-            workflow_stage=self._workflow_stage(),
-            required_next_actions=self._required_next_actions(),
-            risk_flags=self._risk_flags(),
-            action_history=[entry.model_copy(deep=True) for entry in self._history],
-            feedback=feedback or self._last_feedback,
-            remaining_steps=max(self._max_steps - self._step_count, 0),
-            reward=reward,
-            done=done,
-        )
-    def _workflow_stage(self) -> str:
-        if self._done:
-            return "closed"
-        if self._case.queue is None or self._case.priority is None or self._case.issue_type is None:
-            return "intake"
-        if self.task.required_requested_fields and sorted(self._case.requested_fields) != sorted(self.task.required_requested_fields):
-            return "verification"
-        if self._case.customer_follow_up.status == "pending":
-            return "awaiting_customer"
-        if self._case.customer_follow_up.status in {"partial", "incorrect"}:
-            return "follow_up_review"
-        if not self._case.reply:
-            return "customer_communication"
-        if not self._case.internal_note:
-            return "internal_handoff"
-        if self._case.status != self.task.gold_status or self._case.resolution_code != self.task.gold_resolution_code:
-            return "final_resolution"
-        return "ready_to_submit"
-    def _required_next_actions(self) -> list[str]:
-        if self._case.queue is None or self._case.priority is None or self._case.issue_type is None:
-            return ["classify"]
-        if self.task.required_requested_fields and sorted(self._case.requested_fields) != sorted(self.task.required_requested_fields):
-            return ["request_info"]
-        if self._case.customer_follow_up.status == "pending":
-            return ["wait"]
-        needed: list[str] = []
-        if not self._case.reply:
-            needed.append("draft_reply")
-        if not self._case.internal_note:
-            needed.append("add_internal_note")
-        if self._case.status != self.task.gold_status or self._case.resolution_code != self.task.gold_resolution_code:
-            needed.append("submit")
-        return needed
-    def _risk_flags(self) -> list[str]:
-        flags = list(self.task.risk_flags)
-        if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining <= 30:
-            flags.append("sla_breach_risk")
-        if self.task.ticket.affected_users and self.task.ticket.affected_users >= 1000:
-            flags.append("high_customer_impact")
-        if self.task.ticket.secondary_concerns:
-            flags.append("secondary_issue_present")
-        if self._case.customer_follow_up.status == "partial":
-            flags.append("customer_reply_incomplete")
-        if self._case.customer_follow_up.status == "incorrect":
-            flags.append("customer_reply_irrelevant")
-        return sorted(set(flags))
-    def _process_bonus(
-        self,
-        action: SupportDeskAction,
-        previous_stage: str,
-        current_score: float,
-    ) -> float:
-        bonus = 0.0
-        stage_rank = {
-            "intake": 0,
-            "verification": 1,
-            "awaiting_customer": 2,
-            "follow_up_review": 3,
-            "customer_communication": 4,
-            "internal_handoff": 5,
-            "final_resolution": 6,
-            "ready_to_submit": 7,
-            "closed": 8,
-        }
-        current_stage = self._workflow_stage()
-        if stage_rank.get(current_stage, 0) > stage_rank.get(previous_stage, 0):
-            bonus += 0.02
-        if action.operation == "classify" and self._step_count == 1:
-            if self._case.queue == self.task.gold_queue and self._case.priority == self.task.gold_priority:
-                bonus += 0.03
-        if action.operation == "request_info" and current_score > 0 and self.task.required_requested_fields:
-            bonus += 0.02
-        if action.operation == "wait" and self._case.customer_follow_up.status in {"partial", "complete", "incorrect"}:
-            bonus += 0.02
-        if action.operation == "submit" and not self._required_next_actions():
-            bonus += 0.03
-        if self._current_sla_minutes_remaining is not None and self._current_sla_minutes_remaining > 0:
-            if self.task.gold_priority == "urgent" and self._step_count <= 2 and self._case.queue == self.task.gold_queue:
-                bonus += 0.02
-        return round(bonus, 4)
-    def _mixed_action_penalty(self, action: SupportDeskAction) -> float:
-        allowed_fields = {
-            "classify": {"queue", "priority", "issue_type"},
-            "request_info": {"requested_fields"},
-            "draft_reply": {"reply"},
-            "add_internal_note": {"internal_note"},
-            "submit": {"status", "resolution_code"},
-            "wait": set(),
-        }
-        populated_fields = {
-            "queue": action.queue,
-            "priority": action.priority,
-            "issue_type": action.issue_type,
-            "status": action.status,
-            "resolution_code": action.resolution_code,
-            "requested_fields": action.requested_fields,
-            "reply": action.reply,
-            "internal_note": action.internal_note,
-        }
-        extras = 0
-        for field_name, value in populated_fields.items():
-            if field_name in allowed_fields[action.operation]:
-                continue
-            if value is None:
-                continue
-            if isinstance(value, list) and not value:
-                continue
-            if isinstance(value, str) and not value:
-                continue
-            extras += 1
-        return min(0.06, extras * 0.02)
-    def _escalation_tradeoff_penalty(self) -> float:
-        penalty = 0.0
-        if self._case.queue in self.task.over_escalation_queues and self._case.queue != self.task.gold_queue:
-            penalty += 0.06
-        return round(penalty, 4)

supportdesk_env/tasks.py DELETED Viewed

@@ -1,405 +0,0 @@
-"""Task registry for the SupportDesk environment."""
-from __future__ import annotations
-from dataclasses import dataclass
-from typing import Literal
-from supportdesk_env.models import KnowledgeSnippet, SupportTicket
-ALL_QUEUES = [
-    "billing_ops",
-    "trust_and_safety",
-    "platform_engineering",
-    "compliance_ops",
-    "general_support",
-]
-ALL_PRIORITIES = ["low", "normal", "high", "urgent"]
-ALL_STATUSES = ["new", "waiting_on_customer", "resolved", "escalated"]
-ALL_ISSUE_TYPES = [
-    "duplicate_charge",
-    "account_compromise",
-    "production_incident",
-    "regulated_exception",
-    "general_question",
-]
-@dataclass(frozen=True)
-class SupportTaskSpec:
-    """Immutable definition of a single support triage task."""
-    task_id: str
-    difficulty: Literal["easy", "medium", "hard"]
-    title: str
-    objective: str
-    ticket: SupportTicket
-    knowledge_base: tuple[KnowledgeSnippet, ...]
-    gold_queue: str
-    gold_priority: str
-    gold_issue_type: str
-    gold_status: str
-    gold_resolution_code: str
-    required_requested_fields: tuple[str, ...]
-    required_reply_markers: tuple[tuple[str, ...], ...]
-    required_note_markers: tuple[tuple[str, ...], ...]
-    forbidden_reply_markers: tuple[str, ...] = ()
-    risk_flags: tuple[str, ...] = ()
-    follow_up_outcome: Literal["none", "partial", "complete", "incorrect"] = "none"
-    follow_up_message: str = ""
-    follow_up_provided_fields: tuple[str, ...] = ()
-    follow_up_wrong_fields: tuple[str, ...] = ()
-    sla_step_cost: int = 15
-    over_escalation_queues: tuple[str, ...] = ()
-    under_escalation_deadline_step: int | None = None
-    max_steps: int = 6
-TASKS: dict[str, SupportTaskSpec] = {
-    "billing_refund_easy": SupportTaskSpec(
-        task_id="billing_refund_easy",
-        difficulty="easy",
-        title="Duplicate charge refund triage",
-        objective=(
-            "Triage a duplicate-charge billing ticket, send the correct customer response, "
-            "and close the case only if no further customer information is required."
-        ),
-        ticket=SupportTicket(
-            customer_name="Riya Shah",
-            customer_tier="pro",
-            company="PixelNorth Studio",
-            subject="Charged twice after I canceled",
-            body=(
-                "I canceled our Pro annual workspace yesterday, but my card was charged again "
-                "this morning and I still see the old invoice. We only had one workspace, "
-                "so this looks like a duplicate charge. Please fix it quickly."
-            ),
-            region="ap-south-1",
-            affected_users=12,
-            sla_minutes_remaining=240,
-            business_impact="Finance ops are blocked from closing the monthly books until the duplicate invoice is fixed.",
-            secondary_concerns=["The customer also wants confirmation that the canceled workspace will stay deactivated."],
-            attachments=["invoice_7741.pdf"],
-        ),
-        knowledge_base=(
-            KnowledgeSnippet(
-                article_id="KB-101",
-                title="Duplicate charges and same-day cancellations",
-                content=(
-                    "If a customer reports a duplicate charge and the subscription is already "
-                    "canceled, route the ticket to billing_ops with high priority. Billing can "
-                    "approve the refund immediately without requesting extra information when an "
-                    "invoice is attached."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="KB-102",
-                title="Refund communication checklist",
-                content=(
-                    "Customer replies for approved duplicate-charge refunds must confirm that a "
-                    "refund is being processed, mention the duplicate charge, and set the "
-                    "expectation that funds typically appear within 5-7 business days."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="KB-103",
-                title="When to close a billing case",
-                content=(
-                    "Close the case as resolved only after the refund path is clear and no more "
-                    "customer details are needed."
-                ),
-            ),
-        ),
-        gold_queue="billing_ops",
-        gold_priority="high",
-        gold_issue_type="duplicate_charge",
-        gold_status="resolved",
-        gold_resolution_code="refund_approved",
-        required_requested_fields=(),
-        required_reply_markers=(
-            ("refund", "refunded", "reimburse"),
-            ("duplicate charge", "charged twice", "double charge"),
-            ("5-7 business days", "5 to 7 business days", "within 7 business days"),
-        ),
-        required_note_markers=(
-            ("duplicate charge", "double charge"),
-            ("refund", "refund approved"),
-        ),
-        forbidden_reply_markers=("chargeback", "security team"),
-        risk_flags=("finance_close_risk", "avoid_unnecessary_back_and_forth"),
-        over_escalation_queues=("trust_and_safety", "platform_engineering", "compliance_ops"),
-        sla_step_cost=10,
-        max_steps=6,
-    ),
-    "account_takeover_medium": SupportTaskSpec(
-        task_id="account_takeover_medium",
-        difficulty="medium",
-        title="Suspicious login recovery triage",
-        objective=(
-            "Handle a potential account-compromise case, request the missing verification "
-            "details, communicate safe next steps, and keep the case open until the customer replies. "
-            "The agent must protect account safety without promising an unsafe immediate unlock."
-        ),
-        ticket=SupportTicket(
-            customer_name="Marcus Lee",
-            customer_tier="pro",
-            company="Northline Analytics",
-            subject="Locked out after strange login alert",
-            body=(
-                "Our workspace admin got a login alert from a country none of us have visited, "
-                "and now I can't get back into the account. Please unlock it ASAP. The billing "
-                "email is still ours, but I'm worried someone got in."
-            ),
-            region="us-east-1",
-            affected_users=34,
-            sla_minutes_remaining=90,
-            business_impact="The admin is locked out of the analytics workspace ahead of the Monday executive review.",
-            secondary_concerns=["The customer wants the account unlocked immediately, but the verification flow cannot be skipped."],
-            attachments=[],
-        ),
-        knowledge_base=(
-            KnowledgeSnippet(
-                article_id="SEC-201",
-                title="Account compromise routing",
-                content=(
-                    "Potential account-takeover reports route to trust_and_safety with urgent "
-                    "priority. Do not resolve the case immediately."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="SEC-202",
-                title="Verification details before unlock",
-                content=(
-                    "Before access can be restored, ask the customer for the workspace_id, the "
-                    "last successful login time, and the billing email on file. Keep the status "
-                    "waiting_on_customer until the details arrive."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="SEC-203",
-                title="Customer response checklist",
-                content=(
-                    "Security replies should tell the customer to reset their password, scan "
-                    "their device for malware, and explain that the trust team is reviewing the case."
-                ),
-            ),
-        ),
-        gold_queue="trust_and_safety",
-        gold_priority="urgent",
-        gold_issue_type="account_compromise",
-        gold_status="waiting_on_customer",
-        gold_resolution_code="verification_needed",
-        required_requested_fields=("workspace_id", "last_successful_login", "billing_email"),
-        required_reply_markers=(
-            ("reset your password", "change your password"),
-            ("scan", "malware", "device check"),
-            ("trust team", "security team", "trust and safety"),
-        ),
-        required_note_markers=(
-            ("suspicious login", "strange login"),
-            ("locked out", "can't get back", "cannot get back"),
-        ),
-        risk_flags=("unsafe_unlock_request", "identity_verification_required"),
-        follow_up_outcome="partial",
-        follow_up_message=(
-            "Customer follow-up: workspace_id=ws_9021 and billing email confirmed, "
-            "but they could not provide the last successful login time yet."
-        ),
-        follow_up_provided_fields=("workspace_id", "billing_email"),
-        sla_step_cost=18,
-        under_escalation_deadline_step=2,
-        max_steps=7,
-    ),
-    "api_incident_hard": SupportTaskSpec(
-        task_id="api_incident_hard",
-        difficulty="hard",
-        title="Production API incident escalation",
-        objective=(
-            "Triage a high-pressure enterprise incident, ask for the right diagnostics, notify "
-            "the customer that engineering is engaged, and escalate instead of resolving. "
-            "The agent must prioritize the outage over a tempting secondary compliance question."
-        ),
-        ticket=SupportTicket(
-            customer_name="Asha Verma",
-            customer_tier="enterprise",
-            company="Kairo Health",
-            subject="EU rollout blocked by intermittent 500s",
-            body=(
-                "We're launching our EU workspace tonight. Since enabling EU data residency we "
-                "see intermittent HTTP 500 responses from /v1/exports in production. Our "
-                "compliance lead is also asking whether this affects the audit trail, but the "
-                "main issue is the outage. We need help immediately."
-            ),
-            region="eu-west-1",
-            affected_users=1800,
-            sla_minutes_remaining=25,
-            business_impact="A production launch and a customer-facing compliance review are both at risk tonight if the outage persists.",
-            secondary_concerns=["The compliance lead is asking whether audit trails are affected, but the live outage is the primary incident."],
-            attachments=["error_screenshot.png"],
-        ),
-        knowledge_base=(
-            KnowledgeSnippet(
-                article_id="INC-301",
-                title="Production availability incidents",
-                content=(
-                    "Any active production 5xx incident for a paying customer routes to "
-                    "platform_engineering with urgent priority and should be escalated, not resolved."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="INC-302",
-                title="Minimum diagnostics for API incidents",
-                content=(
-                    "Before engineering can investigate, request concrete examples including "
-                    "request_ids, UTC timestamps, and the affected region."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="INC-303",
-                title="Customer communication during an incident",
-                content=(
-                    "The reply should acknowledge an incident, say the on-call engineering team "
-                    "is engaged, and ask for the diagnostics needed to speed investigation."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="INC-304",
-                title="Primary issue triage rule",
-                content=(
-                    "When a production outage appears alongside a secondary compliance or audit "
-                    "question, resolve the live outage first and avoid treating the secondary "
-                    "question as the primary queue-driving issue."
-                ),
-            ),
-        ),
-        gold_queue="platform_engineering",
-        gold_priority="urgent",
-        gold_issue_type="production_incident",
-        gold_status="escalated",
-        gold_resolution_code="incident_opened",
-        required_requested_fields=("request_ids", "timestamp_utc", "region"),
-        required_reply_markers=(
-            ("incident", "outage", "investigating"),
-            ("on-call", "engineering team", "engineering is engaged"),
-            ("request id", "request_ids"),
-            ("utc", "timestamp"),
-        ),
-        required_note_markers=(
-            ("eu data residency", "eu rollout"),
-            ("500", "http 500"),
-            ("launch tonight", "tonight"),
-        ),
-        risk_flags=("sev1_launch_risk", "secondary_issue_distraction", "engineering_escalation_required"),
-        follow_up_outcome="complete",
-        follow_up_message=(
-            "Customer follow-up: request_ids=req_991, req_998; UTC timestamps=2026-04-01T19:20Z, "
-            "2026-04-01T19:27Z; region=eu-west-1."
-        ),
-        follow_up_provided_fields=("request_ids", "timestamp_utc", "region"),
-        sla_step_cost=25,
-        under_escalation_deadline_step=2,
-        max_steps=8,
-    ),
-    "regulated_export_exception_hard": SupportTaskSpec(
-        task_id="regulated_export_exception_hard",
-        difficulty="hard",
-        title="Regulated export exception triage",
-        objective=(
-            "Handle a regulated export-access request where the customer wants a manual bypass. "
-            "The agent must avoid escalating to engineering just because an export error is mentioned, "
-            "route to compliance operations, request the right legal details, and keep the case open "
-            "until the required approvals are supplied."
-        ),
-        ticket=SupportTicket(
-            customer_name="Dr. Elena Kovacs",
-            customer_tier="enterprise",
-            company="NorthSea Clinics",
-            subject="Need urgent export unlock before audit review",
-            body=(
-                "Our audit team needs tonight's export package, but after enabling EU residency the "
-                "export workflow now shows a policy block. Can you temporarily disable the restriction "
-                "or give us a manual bypass just for this audit? We are under pressure and can share "
-                "screenshots if needed."
-            ),
-            region="eu-central-1",
-            affected_users=240,
-            sla_minutes_remaining=55,
-            business_impact="A regulated healthcare audit review may slip tonight if the export path remains blocked.",
-            secondary_concerns=[
-                "The customer suggests an engineering workaround, but legal approval is the real gating factor."
-            ],
-            attachments=["policy_block.png"],
-        ),
-        knowledge_base=(
-            KnowledgeSnippet(
-                article_id="CMP-401",
-                title="Export policy exceptions",
-                content=(
-                    "Requests to bypass region or export restrictions route to compliance_ops. "
-                    "Do not promise a manual engineering workaround before approvals are verified."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="CMP-402",
-                title="Minimum legal details for export review",
-                content=(
-                    "Ask for the tenant_region, dpa_amendment_id, and legal_contact_email before "
-                    "compliance can review an export exception."
-                ),
-            ),
-            KnowledgeSnippet(
-                article_id="CMP-403",
-                title="Customer response rules for regulated exceptions",
-                content=(
-                    "Replies must explain that no temporary bypass can be granted yet, mention the "
-                    "compliance review, and request the required legal approval details."
-                ),
-            ),
-        ),
-        gold_queue="compliance_ops",
-        gold_priority="high",
-        gold_issue_type="regulated_exception",
-        gold_status="waiting_on_customer",
-        gold_resolution_code="legal_approval_required",
-        required_requested_fields=("tenant_region", "dpa_amendment_id", "legal_contact_email"),
-        required_reply_markers=(
-            ("no temporary bypass", "cannot provide a bypass", "can’t provide a bypass"),
-            ("compliance review", "compliance team"),
-            ("tenant_region", "tenant region"),
-            ("dpa_amendment_id", "dpa amendment", "amendment id"),
-        ),
-        required_note_markers=(
-            ("audit", "audit review"),
-            ("eu residency", "policy block"),
-            ("manual bypass", "workaround"),
-        ),
-        forbidden_reply_markers=("engineering workaround", "disable the restriction", "temporary unlock approved"),
-        risk_flags=("regulated_data_risk", "unsafe_shortcut_pressure", "over_escalation_risk"),
-        follow_up_outcome="incorrect",
-        follow_up_message=(
-            "Customer follow-up: sent a screenshot and export job ID, but did not include the DPA "
-            "amendment ID or legal contact."
-        ),
-        follow_up_wrong_fields=("screenshot", "job_id"),
-        sla_step_cost=16,
-        over_escalation_queues=("platform_engineering",),
-        max_steps=8,
-    ),
-}
-def get_task(task_id: str) -> SupportTaskSpec:
-    """Return a task definition or raise a helpful error."""
-    try:
-        return TASKS[task_id]
-    except KeyError as exc:  # pragma: no cover - defensive
-        valid = ", ".join(sorted(TASKS))
-        raise ValueError(f"Unknown task_id '{task_id}'. Valid task ids: {valid}") from exc
-def list_task_ids() -> list[str]:
-    """List tasks in a stable evaluation order."""
-    return list(TASKS)