Spaces:

kevanthonyP
/

it-support-triage

Sleeping

App Files Files Community

kevanthonyP commited on Apr 8

Commit

44f4eca

verified ·

1 Parent(s): 3dba410

Create env_core.py

Browse files

Files changed (1) hide show

env_core.py +229 -0

env_core.py ADDED Viewed

	@@ -0,0 +1,229 @@

+"""
+environment.py — Core ITSupportEnv class.
+Implements the OpenEnv interface:
+  reset(task_id)  → TicketObservation
+  step(action)    → StepResult
+  state()         → EnvState
+"""
+import json
+from typing import Optional, Dict, Any
+from env_models import (
+    TicketObservation, TriageAction, StepResult, EnvState,
+)
+from env_tasks import TASK_MAP, Task
+class ITSupportEnv:
+    """
+    IT Support Ticket Triage Environment.
+    The agent receives a support ticket (observation) and must produce
+    a TriageAction containing category, priority, department, escalation
+    decision, and a response message.
+    Reward is computed by a deterministic grader specific to each task.
+    Partial credit is awarded for each correct dimension of the triage.
+    """
+    def __init__(self) -> None:
+        self._task: Optional[Task] = None
+        self._current_step: int = 0
+        self._total_reward: float = 0.0
+        self._done: bool = True
+        self._history: list = []
+        self._current_obs: Optional[TicketObservation] = None
+    # ─── OpenEnv interface ────────────────────────────────────────────────────
+    def reset(self, task_id: str = "task_easy") -> TicketObservation:
+        """
+        Reset the environment for a new episode.
+        Args:
+            task_id: One of 'task_easy', 'task_medium', 'task_hard'.
+        Returns:
+            The initial TicketObservation for the agent.
+        Raises:
+            ValueError: If task_id is not recognised.
+        """
+        if task_id not in TASK_MAP:
+            raise ValueError(
+                f"Unknown task_id '{task_id}'. "
+                f"Valid options: {list(TASK_MAP.keys())}"
+            )
+        self._task = TASK_MAP[task_id]
+        self._current_step = 0
+        self._total_reward = 0.0
+        self._done = False
+        self._history = []
+        self._current_obs = self._task.ticket
+        return self._current_obs
+    def step(self, action: TriageAction) -> StepResult:
+        """
+        Apply the agent's triage action and return a StepResult.
+        Each task has exactly one step (one ticket = one episode).
+        The grader evaluates the full action and returns a score in [0.0, 1.0].
+        Args:
+            action: The agent's TriageAction.
+        Returns:
+            StepResult with reward, done flag, and grader breakdown.
+        Raises:
+            RuntimeError: If called before reset() or after episode is done.
+        """
+        if self._done or self._task is None:
+            raise RuntimeError(
+                "Cannot call step() before reset() or after episode is done."
+            )
+        # Run the task-specific grader
+        score, breakdown = self._task.grader(action)
+        self._current_step += 1
+        self._total_reward += score
+        self._done = True  # Each episode is exactly 1 step
+        # Record to history
+        self._history.append({
+            "step": self._current_step,
+            "action": action.dict(),
+            "reward": score,
+            "breakdown": breakdown,
+        })
+        return StepResult(
+            observation=None,  # Episode done
+            reward=score,
+            done=True,
+            info={
+                "task_id": self._task.task_id,
+                "task_name": self._task.name,
+                "difficulty": self._task.difficulty,
+                "grader_breakdown": breakdown,
+                "total_reward": self._total_reward,
+            },
+        )
+    def state(self) -> EnvState:
+        """
+        Return the full current environment state.
+        """
+        if self._task is None:
+            return EnvState(
+                task_id="none",
+                task_name="Not initialised",
+                task_description="Call reset() to start.",
+                current_step=0,
+                max_steps=0,
+                total_reward=0.0,
+                done=True,
+                current_ticket=None,
+                history=[],
+            )
+        return EnvState(
+            task_id=self._task.task_id,
+            task_name=self._task.name,
+            task_description=self._task.description,
+            current_step=self._current_step,
+            max_steps=self._task.max_steps,
+            total_reward=self._total_reward,
+            done=self._done,
+            current_ticket=self._current_obs if not self._done else None,
+            history=self._history,
+        )
+    def list_tasks(self) -> list:
+        """Return metadata for all available tasks."""
+        return [
+            {
+                "task_id":     t.task_id,
+                "name":        t.name,
+                "description": t.description,
+                "difficulty":  t.difficulty,
+                "max_steps":   t.max_steps,
+            }
+            for t in TASK_MAP.values()
+        ]
+# ─── Manual test ─────────────────────────────────────────────────────────────
+if __name__ == "__main__":
+    from env_models import TicketCategory, TicketPriority, Department, EscalationDecision
+    env = ITSupportEnv()
+    print("=" * 60)
+    print("TASK LIST")
+    print("=" * 60)
+    for t in env.list_tasks():
+        print(f"  [{t['difficulty'].upper()}] {t['task_id']}: {t['name']}")
+    print("\n" + "=" * 60)
+    print("RUNNING EASY TASK — correct answer")
+    print("=" * 60)
+    obs = env.reset("task_easy")
+    print(f"Ticket: {obs.subject}")
+    result = env.step(TriageAction(
+        category=TicketCategory.HARDWARE,
+        priority=TicketPriority.HIGH,
+        department=Department.TIER2_SUPPORT,
+        escalate=EscalationDecision.ESCALATE,
+        response="Hi Priya, we've received your ticket about your laptop boot issue. A hardware technician will contact you within 30 minutes.",
+        reasoning="Black screen after Windows update with power cycle failure indicates hardware/firmware issue.",
+    ))
+    print(f"Score: {result.reward}")
+    print(f"Breakdown: {json.dumps(result.info['grader_breakdown'], indent=2)}")
+    print("\n" + "=" * 60)
+    print("RUNNING HARD TASK — UNSAFE answer (expect heavy penalty)")
+    print("=" * 60)
+    obs = env.reset("task_hard")
+    result = env.step(TriageAction(
+        category=TicketCategory.SECURITY,
+        priority=TicketPriority.CRITICAL,
+        department=Department.SECURITY_TEAM,
+        escalate=EscalationDecision.ESCALATE,
+        response="Hi, try the decryption tool your colleague suggested and let us know if it works.",
+        reasoning="Ransomware detected.",
+    ))
+    print(f"Score (should be low due to penalty): {result.reward}")
+    print(f"Breakdown: {json.dumps(result.info['grader_breakdown'], indent=2)}")
+    print("\n" + "=" * 60)
+    print("RUNNING HARD TASK — SAFE answer (expect high score)")
+    print("=" * 60)
+    obs = env.reset("task_hard")
+    result = env.step(TriageAction(
+        category=TicketCategory.SECURITY,
+        priority=TicketPriority.CRITICAL,
+        department=Department.SECURITY_TEAM,
+        escalate=EscalationDecision.ESCALATE,
+        response=(
+            "Ananya, this is a ransomware attack. IMMEDIATELY disconnect your computer from the network "
+            "by unplugging the ethernet cable or disabling WiFi. Do NOT attempt to recover files yourself "
+            "or use any decryption tool — this can cause permanent data loss. Do NOT pay the ransom. "
+            "The instruction to not contact IT is a social engineering tactic — ignore it. "
+            "Our security team is already being notified and will contact you within minutes. "
+            "Do not touch the computer further until they arrive."
+        ),
+        reasoning=(
+            "Active ransomware on Finance Controller with access to sensitive data. "
+            "Immediate isolation required. Self-recovery is dangerous. Escalate to security team now."
+        ),
+    ))
+    print(f"Score (should be high): {result.reward}")
+    print(f"Breakdown: {json.dumps(result.info['grader_breakdown'], indent=2)}")