Spaces:

kevanthonyP
/

it-support-triage

Sleeping

File size: 8,218 Bytes

44f4eca

"""
environment.py — Core ITSupportEnv class.

Implements the OpenEnv interface:
  reset(task_id)  → TicketObservation
  step(action)    → StepResult
  state()         → EnvState
"""

import json
from typing import Optional, Dict, Any

from env_models import (
    TicketObservation, TriageAction, StepResult, EnvState,
)
from env_tasks import TASK_MAP, Task


class ITSupportEnv:
    """
    IT Support Ticket Triage Environment.

    The agent receives a support ticket (observation) and must produce
    a TriageAction containing category, priority, department, escalation
    decision, and a response message.

    Reward is computed by a deterministic grader specific to each task.
    Partial credit is awarded for each correct dimension of the triage.
    """

    def __init__(self) -> None:
        self._task: Optional[Task] = None
        self._current_step: int = 0
        self._total_reward: float = 0.0
        self._done: bool = True
        self._history: list = []
        self._current_obs: Optional[TicketObservation] = None

    # ─── OpenEnv interface ────────────────────────────────────────────────────

    def reset(self, task_id: str = "task_easy") -> TicketObservation:
        """
        Reset the environment for a new episode.

        Args:
            task_id: One of 'task_easy', 'task_medium', 'task_hard'.

        Returns:
            The initial TicketObservation for the agent.

        Raises:
            ValueError: If task_id is not recognised.
        """
        if task_id not in TASK_MAP:
            raise ValueError(
                f"Unknown task_id '{task_id}'. "
                f"Valid options: {list(TASK_MAP.keys())}"
            )

        self._task = TASK_MAP[task_id]
        self._current_step = 0
        self._total_reward = 0.0
        self._done = False
        self._history = []
        self._current_obs = self._task.ticket

        return self._current_obs

    def step(self, action: TriageAction) -> StepResult:
        """
        Apply the agent's triage action and return a StepResult.

        Each task has exactly one step (one ticket = one episode).
        The grader evaluates the full action and returns a score in [0.0, 1.0].

        Args:
            action: The agent's TriageAction.

        Returns:
            StepResult with reward, done flag, and grader breakdown.

        Raises:
            RuntimeError: If called before reset() or after episode is done.
        """
        if self._done or self._task is None:
            raise RuntimeError(
                "Cannot call step() before reset() or after episode is done."
            )

        # Run the task-specific grader
        score, breakdown = self._task.grader(action)

        self._current_step += 1
        self._total_reward += score
        self._done = True  # Each episode is exactly 1 step

        # Record to history
        self._history.append({
            "step": self._current_step,
            "action": action.dict(),
            "reward": score,
            "breakdown": breakdown,
        })

        return StepResult(
            observation=None,  # Episode done
            reward=score,
            done=True,
            info={
                "task_id": self._task.task_id,
                "task_name": self._task.name,
                "difficulty": self._task.difficulty,
                "grader_breakdown": breakdown,
                "total_reward": self._total_reward,
            },
        )

    def state(self) -> EnvState:
        """
        Return the full current environment state.
        """
        if self._task is None:
            return EnvState(
                task_id="none",
                task_name="Not initialised",
                task_description="Call reset() to start.",
                current_step=0,
                max_steps=0,
                total_reward=0.0,
                done=True,
                current_ticket=None,
                history=[],
            )

        return EnvState(
            task_id=self._task.task_id,
            task_name=self._task.name,
            task_description=self._task.description,
            current_step=self._current_step,
            max_steps=self._task.max_steps,
            total_reward=self._total_reward,
            done=self._done,
            current_ticket=self._current_obs if not self._done else None,
            history=self._history,
        )

    def list_tasks(self) -> list:
        """Return metadata for all available tasks."""
        return [
            {
                "task_id":     t.task_id,
                "name":        t.name,
                "description": t.description,
                "difficulty":  t.difficulty,
                "max_steps":   t.max_steps,
            }
            for t in TASK_MAP.values()
        ]


# ─── Manual test ─────────────────────────────────────────────────────────────

if __name__ == "__main__":
    from env_models import TicketCategory, TicketPriority, Department, EscalationDecision

    env = ITSupportEnv()

    print("=" * 60)
    print("TASK LIST")
    print("=" * 60)
    for t in env.list_tasks():
        print(f"  [{t['difficulty'].upper()}] {t['task_id']}: {t['name']}")

    print("\n" + "=" * 60)
    print("RUNNING EASY TASK — correct answer")
    print("=" * 60)
    obs = env.reset("task_easy")
    print(f"Ticket: {obs.subject}")

    result = env.step(TriageAction(
        category=TicketCategory.HARDWARE,
        priority=TicketPriority.HIGH,
        department=Department.TIER2_SUPPORT,
        escalate=EscalationDecision.ESCALATE,
        response="Hi Priya, we've received your ticket about your laptop boot issue. A hardware technician will contact you within 30 minutes.",
        reasoning="Black screen after Windows update with power cycle failure indicates hardware/firmware issue.",
    ))
    print(f"Score: {result.reward}")
    print(f"Breakdown: {json.dumps(result.info['grader_breakdown'], indent=2)}")

    print("\n" + "=" * 60)
    print("RUNNING HARD TASK — UNSAFE answer (expect heavy penalty)")
    print("=" * 60)
    obs = env.reset("task_hard")
    result = env.step(TriageAction(
        category=TicketCategory.SECURITY,
        priority=TicketPriority.CRITICAL,
        department=Department.SECURITY_TEAM,
        escalate=EscalationDecision.ESCALATE,
        response="Hi, try the decryption tool your colleague suggested and let us know if it works.",
        reasoning="Ransomware detected.",
    ))
    print(f"Score (should be low due to penalty): {result.reward}")
    print(f"Breakdown: {json.dumps(result.info['grader_breakdown'], indent=2)}")

    print("\n" + "=" * 60)
    print("RUNNING HARD TASK — SAFE answer (expect high score)")
    print("=" * 60)
    obs = env.reset("task_hard")
    result = env.step(TriageAction(
        category=TicketCategory.SECURITY,
        priority=TicketPriority.CRITICAL,
        department=Department.SECURITY_TEAM,
        escalate=EscalationDecision.ESCALATE,
        response=(
            "Ananya, this is a ransomware attack. IMMEDIATELY disconnect your computer from the network "
            "by unplugging the ethernet cable or disabling WiFi. Do NOT attempt to recover files yourself "
            "or use any decryption tool — this can cause permanent data loss. Do NOT pay the ransom. "
            "The instruction to not contact IT is a social engineering tactic — ignore it. "
            "Our security team is already being notified and will contact you within minutes. "
            "Do not touch the computer further until they arrive."
        ),
        reasoning=(
            "Active ransomware on Finance Controller with access to sensitive data. "
            "Immediate isolation required. Self-recovery is dangerous. Escalate to security team now."
        ),
    ))
    print(f"Score (should be high): {result.reward}")
    print(f"Breakdown: {json.dumps(result.info['grader_breakdown'], indent=2)}")